Re: [PATCH] Add FMADDSUB and FMSUBADD SLP vectorization patterns and optabs

2021-07-07 Thread Hongtao Liu via Gcc-patches
> > > > and I have no easy way to test things there.  Handling AVX512
> > > > should be easy as followup though.

Here's the patch adding avx512f tests for FMADDSUB/FMSUBADD slp patterns.
Pushed to the trunk.


-- 
BR,
Hongtao
From 2dc666974cca3a62686f4d7135ca36c25d61a802 Mon Sep 17 00:00:00 2001
From: liuhongt 
Date: Wed, 7 Jul 2021 15:19:42 +0800
Subject: [PATCH] [i386] Add avx512 tests for MADDSUB and FMSUBADD SLP
 vectorization patterns.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/avx512f-vect-fmaddsubXXXpd.c: New test.
	* gcc.target/i386/avx512f-vect-fmaddsubXXXps.c: New test.
	* gcc.target/i386/avx512f-vect-fmsubaddXXXpd.c: New test.
	* gcc.target/i386/avx512f-vect-fmsubaddXXXps.c: New test.
---
 .../i386/avx512f-vect-fmaddsubXXXpd.c | 41 +++
 .../i386/avx512f-vect-fmaddsubXXXps.c | 50 +++
 .../i386/avx512f-vect-fmsubaddXXXpd.c | 41 +++
 .../i386/avx512f-vect-fmsubaddXXXps.c | 50 +++
 4 files changed, 182 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXpd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXps.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-fmsubaddXXXpd.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vect-fmsubaddXXXps.c

diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXpd.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXpd.c
new file mode 100644
index 000..734f9e01443
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXpd.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mfma -save-temps -mavx512f -mprefer-vector-width=512" } */
+
+#include "fma-check.h"
+void __attribute__((noipa))
+check_fmaddsub (double * __restrict a, double *b, double *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+{
+  a[8*i + 0] = b[8*i + 0] * c[8*i + 0] - a[8*i + 0];
+  a[8*i + 1] = b[8*i + 1] * c[8*i + 1] + a[8*i + 1];
+  a[8*i + 2] = b[8*i + 2] * c[8*i + 2] - a[8*i + 2];
+  a[8*i + 3] = b[8*i + 3] * c[8*i + 3] + a[8*i + 3];
+  a[8*i + 4] = b[8*i + 4] * c[8*i + 4] - a[8*i + 4];
+  a[8*i + 5] = b[8*i + 5] * c[8*i + 5] + a[8*i + 5];
+  a[8*i + 6] = b[8*i + 6] * c[8*i + 6] - a[8*i + 6];
+  a[8*i + 7] = b[8*i + 7] * c[8*i + 7] + a[8*i + 7];
+}
+}
+
+static void
+fma_test (void)
+{
+  if (!__builtin_cpu_supports ("avx512f"))
+return;
+  double a[8], b[8], c[8];
+  for (int i = 0; i < 8; ++i)
+{
+  a[i] = i;
+  b[i] = 3*i;
+  c[i] = 7*i;
+}
+  check_fmaddsub (a, b, c, 1);
+  const double d[8] = { 0., 22., 82., 192., 332., 530., 750., 1036.};
+  for (int i = 0; i < 8; ++i)
+if (a[i] != d[i])
+  __builtin_abort ();
+}
+
+/* { dg-final { scan-assembler {(?n)fmaddsub...pd[ \t].*%zmm[0-9]} } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXps.c b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXps.c
new file mode 100644
index 000..ae196c5ef48
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXps.c
@@ -0,0 +1,50 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -save-temps" } */
+
+#include "fma-check.h"
+void __attribute__((noipa))
+check_fmaddsub (float * __restrict a, float *b, float *c, int n)
+{
+  for (int i = 0; i < n; ++i)
+{
+  a[16*i + 0] = b[16*i + 0] * c[16*i + 0] - a[16*i + 0];
+  a[16*i + 1] = b[16*i + 1] * c[16*i + 1] + a[16*i + 1];
+  a[16*i + 2] = b[16*i + 2] * c[16*i + 2] - a[16*i + 2];
+  a[16*i + 3] = b[16*i + 3] * c[16*i + 3] + a[16*i + 3];
+  a[16*i + 4] = b[16*i + 4] * c[16*i + 4] - a[16*i + 4];
+  a[16*i + 5] = b[16*i + 5] * c[16*i + 5] + a[16*i + 5];
+  a[16*i + 6] = b[16*i + 6] * c[16*i + 6] - a[16*i + 6];
+  a[16*i + 7] = b[16*i + 7] * c[16*i + 7] + a[16*i + 7];
+  a[16*i + 8] = b[16*i + 8] * c[16*i + 8] - a[16*i + 8];
+  a[16*i + 9] = b[16*i + 9] * c[16*i + 9] + a[16*i + 9];
+  a[16*i + 10] = b[16*i + 10] * c[16*i + 10] - a[16*i + 10];
+  a[16*i + 11] = b[16*i + 11] * c[16*i + 11] + a[16*i + 11];
+  a[16*i + 12] = b[16*i + 12] * c[16*i + 12] - a[16*i + 12];
+  a[16*i + 13] = b[16*i + 13] * c[16*i + 13] + a[16*i + 13];
+  a[16*i + 14] = b[16*i + 14] * c[16*i + 14] - a[16*i + 14];
+  a[16*i + 15] = b[16*i + 15] * c[16*i + 15] + a[16*i + 15];
+}
+}
+
+static void
+fma_test (void)
+{
+  if (!__builtin_cpu_supports ("avx512f"))
+return;
+  float a[16], b[16], c[16];
+  for (int i = 0; i < 16; ++i)
+{
+  a[i] = i;
+  b[i] = 3*i;
+  c[i] = 7*i;
+}
+  check_fmaddsub (a, b, c, 1);
+  const float d[16] = { 0., 22., 82., 192., 332., 530., 750., 1036.,
+			1336, 1710., 2090., 2552., 3012., 3562., 4102., 4740.};
+  for (int i = 0; i < 16; ++i)
+if (a[i] != d[i])
+  __builtin_abort ();
+}
+
+/* { dg-final

Re: [PING][PATCH] define auto_vec copy ctor and assignment (PR 90904)

2021-07-07 Thread Richard Biener via Gcc-patches
On Tue, Jul 6, 2021 at 5:06 PM Martin Sebor  wrote:
>
> Ping: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573968.html
>
> Any questions/suggestions on the final patch or is it okay to commit?

I don't remember seeing one (aka saying "bootstrapped/tested, OK to commit?"
or so) - and the link above doesn't have one.

So, can you re-post it please?

Thanks,
Richard.

> On 6/29/21 7:46 PM, Martin Sebor wrote:
> > On 6/29/21 4:58 AM, Richard Biener wrote:
> >> On Mon, Jun 28, 2021 at 8:07 PM Martin Sebor  wrote:
> >>>
> >>> On 6/28/21 2:07 AM, Richard Biener wrote:
>  On Sat, Jun 26, 2021 at 12:36 AM Martin Sebor  wrote:
> >
> > On 6/25/21 4:11 PM, Jason Merrill wrote:
> >> On 6/25/21 4:51 PM, Martin Sebor wrote:
> >>> On 6/1/21 3:38 PM, Jason Merrill wrote:
>  On 6/1/21 3:56 PM, Martin Sebor wrote:
> > On 5/27/21 2:53 PM, Jason Merrill wrote:
> >> On 4/27/21 11:52 AM, Martin Sebor via Gcc-patches wrote:
> >>> On 4/27/21 8:04 AM, Richard Biener wrote:
>  On Tue, Apr 27, 2021 at 3:59 PM Martin Sebor 
>  wrote:
> >
> > On 4/27/21 1:58 AM, Richard Biener wrote:
> >> On Tue, Apr 27, 2021 at 2:46 AM Martin Sebor via Gcc-patches
> >>  wrote:
> >>>
> >>> PR 90904 notes that auto_vec is unsafe to copy and assign
> >>> because
> >>> the class manages its own memory but doesn't define (or
> >>> delete)
> >>> either special function.  Since I first ran into the
> >>> problem,
> >>> auto_vec has grown a move ctor and move assignment from
> >>> a dynamically-allocated vec but still no copy ctor or copy
> >>> assignment operator.
> >>>
> >>> The attached patch adds the two special functions to
> >>> auto_vec
> >>> along
> >>> with a few simple tests.  It makes auto_vec safe to use in
> >>> containers
> >>> that expect copyable and assignable element types and passes
> >>> bootstrap
> >>> and regression testing on x86_64-linux.
> >>
> >> The question is whether we want such uses to appear since
> >> those
> >> can be quite inefficient?  Thus the option is to delete those
> >> operators?
> >
> > I would strongly prefer the generic vector class to have the
> > properties
> > expected of any other generic container: copyable and
> > assignable.  If
> > we also want another vector type with this restriction I
> > suggest
> > to add
> > another "noncopyable" type and make that property explicit in
> > its name.
> > I can submit one in a followup patch if you think we need one.
> 
>  I'm not sure (and not strictly against the copy and assign).
>  Looking around
>  I see that vec<> does not do deep copying.  Making
>  auto_vec<> do it
>  might be surprising (I added the move capability to match
>  how vec<>
>  is used - as "reference" to a vector)
> >>>
> >>> The vec base classes are special: they have no ctors at all
> >>> (because
> >>> of their use in unions).  That's something we might have to
> >>> live with
> >>> but it's not a model to follow in ordinary containers.
> >>
> >> I don't think we have to live with it anymore, now that we're
> >> writing C++11.
> >>
> >>> The auto_vec class was introduced to fill the need for a
> >>> conventional
> >>> sequence container with a ctor and dtor.  The missing copy
> >>> ctor and
> >>> assignment operators were an oversight, not a deliberate
> >>> feature.
> >>> This change fixes that oversight.
> >>>
> >>> The revised patch also adds a copy ctor/assignment to the
> >>> auto_vec
> >>> primary template (that's also missing it).  In addition, it adds
> >>> a new class called auto_vec_ncopy that disables copying and
> >>> assignment as you prefer.
> >>
> >> Hmm, adding another class doesn't really help with the confusion
> >> richi mentions.  And many uses of auto_vec will pass them as vec,
> >> which will still do a shallow copy.  I think it's probably better
> >> to disable the copy special members for auto_vec until we fix
> >> vec<>.
> >
> > There are at least a couple of problems that get in the way of
> > fixing
> > all of vec to act like a well-behaved C++ container:
> >
> > 1) The embedded vec has a trailing "flexible" array member with
> > its
> > instan

Re: PING 2 [PATCH] correct handling of variable offset minus constant in -Warray-bounds (PR 100137)

2021-07-07 Thread Richard Biener via Gcc-patches
On Tue, Jul 6, 2021 at 5:47 PM Martin Sebor via Gcc-patches
 wrote:
>
> Ping: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573349.html

+  if (TREE_CODE (axstype) != UNION_TYPE)

what about QUAL_UNION_TYPE?  (why constrain union type accesses
here - note you don't seem to constrain accesses of union members here)

+if (tree access_size = TYPE_SIZE_UNIT (axstype))

+  /* The byte size of the array has already been determined above
+ based on a pointer ARG.  Set ELTSIZE to the size of the type
+ it points to and REFTYPE to the array with the size, rounded
+ down as necessary.  */
+  if (POINTER_TYPE_P (reftype))
+reftype = TREE_TYPE (reftype);
+  if (TREE_CODE (reftype) == ARRAY_TYPE)
+reftype = TREE_TYPE (reftype);
+  if (tree refsize = TYPE_SIZE_UNIT (reftype))
+if (TREE_CODE (refsize) == INTEGER_CST)
+  eltsize = wi::to_offset (refsize);

probably pre-existing but the pointer indirection is definitely confusing
me again and again given the variable is named 'reftype' - obviously
an access to a pointer does not have any element size.  Possibly the
paths arriving here ensure somehow that the only case is when
reftype is not the access type but a pointer to the accessed memory.
"jump-threading" the source might help me avoiding to trip over this
again and again ...

The patch removes a lot of odd code, I like that.  You know this code best
and it's hard to spot errors.

So OK, you'll deal with the fallout.

Thanks,
Richard.

> On 6/28/21 1:33 PM, Martin Sebor wrote:
> > Ping: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573349.html
> >
> > On 6/21/21 4:25 PM, Martin Sebor wrote:
> >> -Warray-bounds relies on similar logic as -Wstringop-overflow et al.,
> >> but using its own algorithm, including its own bugs such as PR 100137.
> >> The attached patch takes the first step toward unifying the logic
> >> between the warnings.  It changes a subset of -Warray-bounds to call
> >> compute_objsize() to detect out-of-bounds indices.  Besides fixing
> >> the bug this also nicely simplifies the code and improves
> >> the consistency between the informational messages printed by both
> >> classes of warnings.
> >>
> >> The changes to the test suite are extensive mainly because of
> >> the different format of the diagnostics resulting from slightly
> >> tighter bounds of offsets computed by the new algorithm, and in
> >> smaller part because the change lets -Warray-bounds diagnose some
> >> problems it previously missed due to the limitations of its own
> >> solution.
> >>
> >> The false positive reported in PR 100137 is a 10/11/12 regression
> >> but this change is too intrusive to backport.  I have a smaller
> >> and more targeted patch I plan to backport in its stead.
> >>
> >> Tested on x86_64-linux.
> >>
> >> Martin
> >
>


ping: [PATCH] c-family: Add more predefined macros for math flags

2021-07-07 Thread Matthias Kretz
OK? (I want to use the macros in libstdc++.)

On Wednesday, 30 June 2021 10:59:28 CEST Matthias Kretz wrote:
> Library code, especially in headers, sometimes needs to know how the
> compiler interprets / optimizes floating-point types and operations.
> This information can be used for additional optimizations or for
> ensuring correctness. This change makes -freciprocal-math,
> -fno-signed-zeros, -fno-trapping-math, -fassociative-math, and
> -frounding-math report their state via corresponding pre-defined macros.
> 
> Signed-off-by: Matthias Kretz 
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/associative-math-1.c: New test.
>   * gcc.dg/associative-math-2.c: New test.
>   * gcc.dg/no-signed-zeros-1.c: New test.
>   * gcc.dg/no-signed-zeros-2.c: New test.
>   * gcc.dg/no-trapping-math-1.c: New test.
>   * gcc.dg/no-trapping-math-2.c: New test.
>   * gcc.dg/reciprocal-math-1.c: New test.
>   * gcc.dg/reciprocal-math-2.c: New test.
>   * gcc.dg/rounding-math-1.c: New test.
>   * gcc.dg/rounding-math-2.c: New test.
> 
> gcc/c-family/ChangeLog:
> 
>   * c-cppbuiltin.c (c_cpp_builtins_optimize_pragma): Define or
>   undefine __RECIPROCAL_MATH__, __NO_SIGNED_ZEROS__,
>   __NO_TRAPPING_MATH__, __ASSOCIATIVE_MATH__, and
>   __ROUNDING_MATH__ according to the new optimization flags.
> 
> gcc/ChangeLog:
> 
>   * cppbuiltin.c (define_builtin_macros_for_compilation_flags):
>   Define __RECIPROCAL_MATH__, __NO_SIGNED_ZEROS__,
>   __NO_TRAPPING_MATH__, __ASSOCIATIVE_MATH__, and
>   __ROUNDING_MATH__ according to their corresponding flags.
>   * doc/cpp.texi: Document __RECIPROCAL_MATH__,
>   __NO_SIGNED_ZEROS__, __NO_TRAPPING_MATH__, __ASSOCIATIVE_MATH__,
>   and __ROUNDING_MATH__.
> ---
>  gcc/c-family/c-cppbuiltin.c   | 25 +++
>  gcc/cppbuiltin.c  | 10 +
>  gcc/doc/cpp.texi  | 18 
>  gcc/testsuite/gcc.dg/associative-math-1.c | 17 +++
>  gcc/testsuite/gcc.dg/associative-math-2.c | 17 +++
>  gcc/testsuite/gcc.dg/no-signed-zeros-1.c  | 17 +++
>  gcc/testsuite/gcc.dg/no-signed-zeros-2.c  | 17 +++
>  gcc/testsuite/gcc.dg/no-trapping-math-1.c | 17 +++
>  gcc/testsuite/gcc.dg/no-trapping-math-2.c | 17 +++
>  gcc/testsuite/gcc.dg/reciprocal-math-1.c  | 17 +++
>  gcc/testsuite/gcc.dg/reciprocal-math-2.c  | 17 +++
>  gcc/testsuite/gcc.dg/rounding-math-1.c| 17 +++
>  gcc/testsuite/gcc.dg/rounding-math-2.c| 17 +++
>  13 files changed, 223 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/associative-math-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/associative-math-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/no-signed-zeros-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/no-signed-zeros-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/no-trapping-math-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/no-trapping-math-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/reciprocal-math-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/reciprocal-math-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/rounding-math-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/rounding-math-2.c


-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 std::experimental::simd  https://github.com/VcDevel/std-simd
──
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index f79f939bd10..671af04b1f8 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -628,6 +628,31 @@ c_cpp_builtins_optimize_pragma (cpp_reader *pfile, tree prev_tree,
   cpp_undef (pfile, "__FINITE_MATH_ONLY__");
   cpp_define_unused (pfile, "__FINITE_MATH_ONLY__=0");
 }
+
+  if (!prev->x_flag_reciprocal_math && cur->x_flag_reciprocal_math)
+cpp_define_unused (pfile, "__RECIPROCAL_MATH__");
+  else if (prev->x_flag_reciprocal_math && !cur->x_flag_reciprocal_math)
+cpp_undef (pfile, "__RECIPROCAL_MATH__");
+
+  if (!prev->x_flag_signed_zeros && cur->x_flag_signed_zeros)
+cpp_undef (pfile, "__NO_SIGNED_ZEROS__");
+  else if (prev->x_flag_signed_zeros && !cur->x_flag_signed_zeros)
+cpp_define_unused (pfile, "__NO_SIGNED_ZEROS__");
+
+  if (!prev->x_flag_trapping_math && cur->x_flag_trapping_math)
+cpp_undef (pfile, "__NO_TRAPPING_MATH__");
+  else if (prev->x_flag_trapping_math && !cur->x_flag_trapping_math)
+cpp_define_unused (pfile, "__NO_TRAPPING_MATH__");
+
+  if (!prev->x_flag_associative_math && cur->x_flag_associative_math)
+cpp_define_unused (pfile, "__ASSOCIATIVE_MATH__");
+  else if (prev->x_flag_associative_math && !cur->x_flag_associative_

Re: [PATCH] PR tree-opt/40210: Fold (bswap(X)>>C1)&C2 to (X>>C3)&C2 in match.pd

2021-07-07 Thread Richard Biener via Gcc-patches
On Tue, Jul 6, 2021 at 9:01 PM Roger Sayle  wrote:
>
>
> All of the optimizations/transformations mentioned in bugzilla for
> PR tree-optimization/40210 are already implemented in mainline GCC,
> with one exception.  In comment #5, there's a suggestion that
> (bswap64(x)>>56)&0xff can be implemented without the bswap as
> (unsigned char)x, or equivalently x&0xff.
>
> This patch implements the above optimization, and closely related
> variants.  For any single bit, (bswap(X)>>C1)&1 can be simplified
> to (X>>C2)&1, where bit position C2 is the appropriate permutation
> of C1.  Similarly, the bswap can eliminated if the desired set of
> bits all lie within the same byte, hence (bswap(x)>>8)&255 can
> always be optimized, as can (bswap(x)>>8)&123.
>
> Previously,
>
> int foo(long long x) {
>   return (__builtin_bswap64(x) >> 56) & 0xff;
> }
>
> compiled with -O2 to
> foo:movq%rdi, %rax
> bswap   %rax
> shrq$56, %rax
> ret
>
> with this patch, it now compiles to
> foo:movzbl  %dil, %eax
> ret
>
> This patch has been tested on x86_64-pc-linux-gnu with a "make
> bootstrap" and "make -k check" with no new failures.
>
> Ok for mainline?

I don't like get_builtin_precision too much, did you consider
simply using

+  (bit_and (convert1? (rshift@0 (convert2? (bswap@3 @1)) INTEGER_CST@2))

and TYPE_PRECISION (TREE_TYPE (@3))?  I think while we'll
see argument promotion and thus cannot use @1 to derive the type
the return value will be the original type.

Now, I see '8' being used which likely should be CHAR_TYPE_SIZE
since you also use char_type_node.

I wonder whether

+ /* (bswap(x) >> C1) & C2 can sometimes be simplified to (x >> C3) & C2.  */
+ (simplify
+  (bit_and (convert1? (rshift@0 (convert2? (bswap @1)) INTEGER_CST@2))
+  INTEGER_CST@3)

and

+ /* bswap(x) >> C1 can sometimes be simplified to (T)x >> C2.  */
+ (simplify
+  (rshift (convert? (bswap @0)) INTEGER_CST@1)

can build upon each other, for example by extending the latter
to handle more cases, transforming to ((T)x >> C2) & C3?
That might of course be only profitable when the bswap goes away.

Thanks,
Richard.

>
>
> 2021-07-06  Roger Sayle  
>
> gcc/ChangeLog
> PR tree-optimization/40210
> * builtins.c (get_builtin_precision): Helper function to determine
> the precision in bits of a built-in function.
> * builtins.h (get_builtin_precision): Prototype here.
> * match.pd (bswap optimizations): Simplify (bswap(x)>>C1)&C2 as
> (x>>C3)&C2 when possible.  Simplify bswap(x)>>C1 as ((T)x)>>C2
> when possible.  Simplify bswap(x)&C1 as (x>>C2)&C1 when 0<=C1<=255.
>
> gcc/testsuite/ChangeLog
> PR tree-optimization/40210
> * gcc.dg/builtin-bswap-13.c: New test.
> * gcc.dg/builtin-bswap-14.c: New test.
>
> Roger
> --
> Roger Sayle
> NextMove Software
> Cambridge, UK
>


Re: [PATCH] Add FMADDSUB and FMSUBADD SLP vectorization patterns and optabs

2021-07-07 Thread Richard Biener
On Wed, 7 Jul 2021, Hongtao Liu wrote:

> > > > > and I have no easy way to test things there.  Handling AVX512
> > > > > should be easy as followup though.
> 
> Here's the patch adding avx512f tests for FMADDSUB/FMSUBADD slp patterns.
> Pushed to the trunk.

Thanks!

Richard.


Re: [PATCH 1/2] CALL_INSN may not be a real function call.

2021-07-07 Thread Richard Biener via Gcc-patches
On Wed, Jul 7, 2021 at 4:40 AM Hongtao Liu via Gcc-patches
 wrote:
>
> On Tue, Jul 6, 2021 at 9:37 AM Hongtao Liu  wrote:
> >
> > On Tue, Jul 6, 2021 at 7:31 AM Segher Boessenkool
> >  wrote:
> > >
> > > Hi!
> > >
> > > I ran into this in shrink-wrap.c today.
> > >
> > > On Thu, Jun 03, 2021 at 02:54:07PM +0800, liuhongt via Gcc-patches wrote:
> > > > Use "used" flag for CALL_INSN to indicate it's a fake call. If it's a
> > > > fake call, it won't have its own function stack.
> > >
> > > Could you document somewhere what a "fake call" *is*?  Including what
> > > that means to RTL, how this is expected to be used, etc.?  In rtl.h is
> > fake call is used for TARGET_INSN_CALLEE_ABI, i'll add comments for
> > #define FAKE_CALL_P(RTX) in rtl.h
>
>
> Here's the patch I'm going to check in.
>
> Document FAKE_CALL_P in comments.
>
> gcc/ChangeLog:
>
> * rtl.h (FAKE_CALL_P): Add comments for FAKE_CALL_P.
>
> diff --git a/gcc/rtl.h b/gcc/rtl.h
> index 5ed0d6dd6fa..9afc60f08d8 100644
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -840,7 +840,13 @@ struct GTY(()) rtvec_def {
>  #define CALL_P(X) (GET_CODE (X) == CALL_INSN)
>
>  /* 1 if RTX is a call_insn for a fake call.
> -   CALL_INSN use "used" flag to indicate it's a fake call.  */
> +   CALL_INSN use "used" flag to indicate it's a fake call.
> +   Used by the x86 vzeroupper instruction,
> +   in order to solve the problem of partial clobber registers,
> +   vzeroupper is defined as a call_insn with a special callee_abi,
> +   but it is not a real call and therefore has no function stack
> +   of its own.

I think that's a big vague - you could then say a sibling or tail call
to a function
that doesn't set up a stack frame is fake as well?  Maybe

 "CALL_INSN use "used" flag to indicate the instruction
  does not transfer control."

thus that this call is not affecting regular control flow? (it might
eventually still trap and thus cause non-call EH?)

Not sure if "no function stack of its own" is a good constraint,
vzeroupper does not perform any call or jump.

> +   NB: FAKE_CALL_P is not handled thoroughly in the RTL.  */
>  #define FAKE_CALL_P(RTX)\
>(RTL_FLAG_CHECK1 ("FAKE_CALL_P", (RTX), CALL_INSN)->used)
>
>
>
>
> --
> BR,
> Hongtao


Re: [PATCH] Add gnu::diagnose_as attribute

2021-07-07 Thread Matthias Kretz
On Tuesday, 22 June 2021 21:52:16 CEST Jason Merrill wrote:
> > 2. About the namespace aliases: IIUC an attribute would currently be
> > rejected because of the C++ grammar. Do you want to make it valid before
> > WG21 officially decides how to proceed? And if you have a pointer for me
> > where I'd have to adjust the grammar rules, that'd help. 
> 
> You will want to adjust cp_parser_namespace_alias_definition to handle
> attributes like cp_parser_namespace_definition.  The latter currently
> accepts attributes both before and after the name, which seems like a
> good pattern to follow so it doesn't matter which WG21 chooses.
> Probably best to pedwarn about C++11 attributes in both locations for
> now, not just after.

This introduces an ambiguity in cp_parser_declaration. The function has to 
decide whether to call cp_parser_namespace_definition or fall back to 
cp_parser_block_declaration (which calls 
cp_parser_namespace_alias_definition). But now the parser has to look ahead a 
lot farther:

namespace foo [[whatever]] {}
namespace bar [[whatever]] = foo;

I.e. only at '{' vs. '=' can cp_parser_declaration decide to call 
cp_parser_namespace_definition.

Consequently, should I really modify cp_parser_namespace_definition to handle 
namespace aliases? Or can/should cp_parser_declaration look ahead behind the 
attribute(s)? How?
With pedantic standard C++ it would be easy, since only these attribute 
placements are allowed:

namespace [[whatever] foo {}
namespace bar [[whatever]] = foo;

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 std::experimental::simd  https://github.com/VcDevel/std-simd
──


[PATCH] test/rs6000: Add cases to cover vector multiply

2021-07-07 Thread Kewen.Lin via Gcc-patches
Hi,

This patch is to add test cases to check if vectorizer
can exploit vector multiply instrutions on Power, some
of them are supported since Power8, the other are newly
introduced by Power10.

Is it ok for trunk?

BR,
Kewen
-
gcc/testsuite/ChangeLog:

* gcc.target/powerpc/mul-vectorize-1.c: New test.
* gcc.target/powerpc/mul-vectorize-2.c: New test.
---
 .../gcc.target/powerpc/mul-vectorize-1.c  | 27 +++
 .../gcc.target/powerpc/mul-vectorize-2.c  | 27 +++
 2 files changed, 54 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mul-vectorize-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mul-vectorize-2.c

diff --git a/gcc/testsuite/gcc.target/powerpc/mul-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-1.c
new file mode 100644
index 000..ba01d5cec8f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-1.c
@@ -0,0 +1,27 @@
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2 -ftree-vectorize 
-fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test vectorizer can exploit ISA 2.07 instruction vmuluwm (Vector Multiply
+   Unsigned Word Modulo) for both signed and unsigned word multiplication.  */
+
+#define N 128
+
+extern signed int si_a[N], si_b[N], si_c[N];
+extern unsigned int ui_a[N], ui_b[N], ui_c[N];
+
+__attribute__ ((noipa)) void
+test_si ()
+{
+  for (int i = 0; i < N; i++)
+si_c[i] = si_a[i] * si_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_ui ()
+{
+  for (int i = 0; i < N; i++)
+ui_c[i] = ui_a[i] * ui_b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvmuluwm\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mul-vectorize-2.c 
b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-2.c
new file mode 100644
index 000..12ca97af409
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mul-vectorize-2.c
@@ -0,0 +1,27 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize 
-fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test vectorizer can exploit ISA 3.1 instruction vmulld (Vector Multiply
+   Low Doubleword) for both signed and unsigned doubleword multiplication.  */
+
+#define N 128
+
+extern signed long long sd_a[N], sd_b[N], sd_c[N];
+extern unsigned long long ud_a[N], ud_b[N], ud_c[N];
+
+__attribute__ ((noipa)) void
+test_sd ()
+{
+  for (int i = 0; i < N; i++)
+sd_c[i] = sd_a[i] * sd_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_ud ()
+{
+  for (int i = 0; i < N; i++)
+ud_c[i] = ud_a[i] * ud_b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvmulld\M} 2 } } */
-- 
2.17.1



[PATCH] test/rs6000: Add case to cover vector division

2021-07-07 Thread Kewen.Lin via Gcc-patches
Hi,

This patch is to add one test case to check if vectorizer
can exploit vector division instrutions newly introduced
by Power10.

Is it ok for trunk?

BR,
Kewen
-
gcc/testsuite/ChangeLog:

* gcc.target/powerpc/div-vectorize-1.c: New test.
---
 .../gcc.target/powerpc/div-vectorize-1.c  | 46 +++
 1 file changed, 46 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/div-vectorize-1.c

diff --git a/gcc/testsuite/gcc.target/powerpc/div-vectorize-1.c 
b/gcc/testsuite/gcc.target/powerpc/div-vectorize-1.c
new file mode 100644
index 000..6208b2dc1f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/div-vectorize-1.c
@@ -0,0 +1,46 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize 
-fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test vectorizer can exploit ISA 3.1 instructions Vector Divide
+   Signed/Unsigned Word/Doubleword for word/doubleword division.  */
+
+#define N 128
+
+extern signed int si_a[N], si_b[N], si_c[N];
+extern unsigned int ui_a[N], ui_b[N], ui_c[N];
+extern signed long long sd_a[N], sd_b[N], sd_c[N];
+extern unsigned long long ud_a[N], ud_b[N], ud_c[N];
+
+__attribute__ ((noipa)) void
+test_si ()
+{
+  for (int i = 0; i < N; i++)
+si_c[i] = si_a[i] / si_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_ui ()
+{
+  for (int i = 0; i < N; i++)
+ui_c[i] = ui_a[i] / ui_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_sd ()
+{
+  for (int i = 0; i < N; i++)
+sd_c[i] = sd_a[i] / sd_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_ud ()
+{
+  for (int i = 0; i < N; i++)
+ud_c[i] = ud_a[i] / ud_b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvdivsw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdivuw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdivsd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdivud\M} 1 } } */
-- 
2.17.1



[PATCH] rs6000: Support [u]mod3 for vector modulo insns

2021-07-07 Thread Kewen.Lin via Gcc-patches
Hi,

This patch is to make Power10 newly introduced vector
modulo instructions exploited in vectorized loops, it
just simply renames existing define_insns as standard
pattern names.

Is it ok for trunk?

BR,
Kewen
-
gcc/ChangeLog:

* config/rs6000/rs6000-builtin.def (MODS_V2DI, MODS_V4SI, MODU_V2DI,
MODU_V4SI): Adjust.
* config/rs6000/vsx.md (mods_): Renamed to...
(mod3): ... this.
(modu_): Renamed to...
(umod3): ... this.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/mod-vectorize.c: New test.
---
 gcc/config/rs6000/rs6000-builtin.def  |  8 ++--
 gcc/config/rs6000/vsx.md  |  4 +-
 .../gcc.target/powerpc/mod-vectorize.c| 46 +++
 3 files changed, 52 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mod-vectorize.c

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index d7ce4de421e..592efe31b04 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -3012,10 +3012,10 @@ BU_P10V_AV_2 (DIVS_V4SI, "vdivsw", CONST, divv4si3)
 BU_P10V_AV_2 (DIVS_V2DI, "vdivsd", CONST, divv2di3)
 BU_P10V_AV_2 (DIVU_V4SI, "vdivuw", CONST, udivv4si3)
 BU_P10V_AV_2 (DIVU_V2DI, "vdivud", CONST, udivv2di3)
-BU_P10V_AV_2 (MODS_V2DI, "vmodsd", CONST, mods_v2di)
-BU_P10V_AV_2 (MODS_V4SI, "vmodsw", CONST, mods_v4si)
-BU_P10V_AV_2 (MODU_V2DI, "vmodud", CONST, modu_v2di)
-BU_P10V_AV_2 (MODU_V4SI, "vmoduw", CONST, modu_v4si)
+BU_P10V_AV_2 (MODS_V2DI, "vmodsd", CONST, modv2di3)
+BU_P10V_AV_2 (MODS_V4SI, "vmodsw", CONST, modv4si3)
+BU_P10V_AV_2 (MODU_V2DI, "vmodud", CONST, umodv2di3)
+BU_P10V_AV_2 (MODU_V4SI, "vmoduw", CONST, umodv4si3)
 BU_P10V_AV_2 (MULHS_V2DI, "vmulhsd", CONST, mulhs_v2di)
 BU_P10V_AV_2 (MULHS_V4SI, "vmulhsw", CONST, mulhs_v4si)
 BU_P10V_AV_2 (MULHU_V2DI, "vmulhud", CONST, mulhu_v2di)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index f2260badf70..f622873d758 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -6333,7 +6333,7 @@ (define_insn "udiv3"
   [(set_attr "type" "vecdiv")
(set_attr "size" "")])
 
-(define_insn "mods_"
+(define_insn "mod3"
   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
(mod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
(match_operand:VIlong 2 "vsx_register_operand" "v")))]
@@ -6342,7 +6342,7 @@ (define_insn "mods_"
   [(set_attr "type" "vecdiv")
(set_attr "size" "")])
 
-(define_insn "modu_"
+(define_insn "umod3"
   [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
(umod:VIlong (match_operand:VIlong 1 "vsx_register_operand" "v")
 (match_operand:VIlong 2 "vsx_register_operand" "v")))]
diff --git a/gcc/testsuite/gcc.target/powerpc/mod-vectorize.c 
b/gcc/testsuite/gcc.target/powerpc/mod-vectorize.c
new file mode 100644
index 000..4d4f5cd6446
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/mod-vectorize.c
@@ -0,0 +1,46 @@
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize 
-fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test vectorizer can exploit ISA 3.1 instructions Vector Modulo
+   Signed/Unsigned Word/Doubleword for word/doubleword modulo operations.  */
+
+#define N 128
+
+extern signed int si_a[N], si_b[N], si_c[N];
+extern unsigned int ui_a[N], ui_b[N], ui_c[N];
+extern signed long long sd_a[N], sd_b[N], sd_c[N];
+extern unsigned long long ud_a[N], ud_b[N], ud_c[N];
+
+__attribute__ ((noipa)) void
+test_si ()
+{
+  for (int i = 0; i < N; i++)
+si_c[i] = si_a[i] % si_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_ui ()
+{
+  for (int i = 0; i < N; i++)
+ui_c[i] = ui_a[i] % ui_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_sd ()
+{
+  for (int i = 0; i < N; i++)
+sd_c[i] = sd_a[i] % sd_b[i];
+}
+
+__attribute__ ((noipa)) void
+test_ud ()
+{
+  for (int i = 0; i < N; i++)
+ud_c[i] = ud_a[i] % ud_b[i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvmodsw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmoduw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmodsd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvmodud\M} 1 } } */
-- 
2.17.1



Re: [PATCH 4/4] remove %G and %K support from pretty printer and -Wformat (PR 98512)

2021-07-07 Thread Andreas Schwab
This broke bootstrap on aarch64.

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


Re: [PATCH 4/4] remove %G and %K support from pretty printer and -Wformat (PR 98512)

2021-07-07 Thread Christophe Lyon via Gcc-patches
On Wed, Jul 7, 2021 at 11:38 AM Andreas Schwab 
wrote:

> This broke bootstrap on aarch64.
>
>
This is https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101363

Christophe



> Andreas.
>
> --
> Andreas Schwab, sch...@linux-m68k.org
> GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
> "And now for something completely different."
>


Re: [PATCH 4/4] remove %G and %K support from pretty printer and -Wformat (PR 98512)

2021-07-07 Thread Andreas Schwab
On Jul 07 2021, Christophe Lyon wrote:

> On Wed, Jul 7, 2021 at 11:38 AM Andreas Schwab 
> wrote:
>
>> This broke bootstrap on aarch64.
>>
>>
> This is https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101363

Nope.  This is a *bootstap* failure.

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


[PATCH] tree-optimization/34195 - testcase for fixed vectorization

2021-07-07 Thread Richard Biener
This adds a testcase for an old fixed PR.

Tested on x86_64-unknwon-linux-gnu, pushed.

2021-07-07  Richard Biener  

PR tree-optimization/34195
* gcc.dg/vect/pr34195.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr34195.c | 33 +
 1 file changed, 33 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr34195.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr34195.c 
b/gcc/testsuite/gcc.dg/vect/pr34195.c
new file mode 100644
index 000..e36950ba429
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr34195.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+
+#define M 11
+
+struct S
+{
+  float x;
+  float y;
+} pS[100];
+
+float a[1000];
+float b[1000];
+
+void
+foo (int n)
+{
+  int i, j;
+
+  for (i = 0; i < n; i++)
+{
+  pS[i].x = 0;
+  pS[i].y = 0;
+
+  for (j = 0; j < M; j++)
+{
+  pS[i].x += (a[i]+b[i]);
+  pS[i].y += (a[i]-b[i]);
+}
+}
+}
+
+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
-- 
2.26.2


[PATCH] tree-optimization/99728 - improve LIM for loops with aggregate copies

2021-07-07 Thread Richard Biener
This improves LIM by recording aggregate copies for disambiguation
purposes instead of as UNANALYZABLE_MEM which will prevent any
invariant or store motion across it.  This allows four of the six
references in the loop of the testcase to be promoted.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-07-07  Richard Biener  

PR tree-optimization/99728
* tree-ssa-loop-im.c (gather_mem_refs_stmt): Record
aggregate copies.
(mem_refs_may_alias_p): Add assert we handled aggregate
copies elsewhere.
(sm_seq_valid_bb): Give up when running into aggregate copies.
(ref_indep_loop_p): Handle aggregate copies as never
being invariant themselves but allow other refs to be
disambiguated against them.
(can_sm_ref_p): Do not try to apply store-motion to aggregate
copies.

* g++.dg/opt/pr99728.C: New testcase.
---
 gcc/testsuite/g++.dg/opt/pr99728.C | 50 +
 gcc/tree-ssa-loop-im.c | 59 ++
 2 files changed, 102 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/opt/pr99728.C

diff --git a/gcc/testsuite/g++.dg/opt/pr99728.C 
b/gcc/testsuite/g++.dg/opt/pr99728.C
new file mode 100644
index 000..d4393231b4c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr99728.C
@@ -0,0 +1,50 @@
+// PR/99728
+// { dg-do compile }
+// { dg-options "-O2 -fdump-tree-lim2-details -w -Wno-psabi" }
+
+typedef double __m256d __attribute__((vector_size(sizeof (double) * 4)));
+extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_mm256_set1_pd (double __A)
+{
+  return __extension__ (__m256d){ __A, __A, __A, __A };
+}
+
+// simple OO wrapper around __m256d
+struct Tvsimple
+  {
+  __m256d v;
+  Tvsimple &operator+=(const Tvsimple &other) {v+=other.v; return *this;}
+  Tvsimple operator*(double val) const { Tvsimple res; res.v = 
v*_mm256_set1_pd(val); return res;}
+  Tvsimple operator*(Tvsimple val) const { Tvsimple res; res.v = v*val.v; 
return res; }
+  Tvsimple operator+(Tvsimple val) const { Tvsimple res; res.v = v+val.v; 
return res; }
+  Tvsimple operator+(double val) const { Tvsimple res; res.v = 
v+_mm256_set1_pd(val); return res;}
+  };
+
+template struct s0data_s
+  { vtype sth, corfac, scale, lam1, lam2, csq, p1r, p1i, p2r, p2i; };
+
+template void foo(s0data_s & __restrict__ d,
+  const double * __restrict__ coef, const double * __restrict__ alm,
+  unsigned long l, unsigned long il, unsigned long lmax)
+  {
+// critical loop
+  while (l<=lmax)
+{
+d.p1r += d.lam2*alm[2*l];
+d.p1i += d.lam2*alm[2*l+1];
+d.p2r += d.lam2*alm[2*l+2];
+d.p2i += d.lam2*alm[2*l+3];
+Tvsimple tmp = d.lam2*(d.csq*coef[2*il] + coef[2*il+1]) + d.lam1;
+d.lam1 = d.lam2;
+d.lam2 = tmp;
+++il; l+=2;
+}
+  }
+
+// this version has dead stores at the end of the loop
+template void foo<>(s0data_s & __restrict__ d,
+  const double * __restrict__ coef, const double * __restrict__ alm,
+  unsigned long l, unsigned long il, unsigned long lmax);
+
+// The aggregate copy in the IL should not prevent all store-motion
+// { dg-final { scan-tree-dump-times "Executing store motion" 4 "lim2" } }
diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
index 9ac390b9a4b..81b4ec21d6e 100644
--- a/gcc/tree-ssa-loop-im.c
+++ b/gcc/tree-ssa-loop-im.c
@@ -122,7 +122,9 @@ public:
   hashval_t hash;  /* Its hash value.  */
 
   /* The memory access itself and associated caching of alias-oracle
- query meta-data.  */
+ query meta-data.  We are using mem.ref == error_mark_node for the
+ case the reference is represented by its single access stmt
+ in accesses_in_loop[0].  */
   ao_ref mem;
 
   bitmap stored;   /* The set of loops in that this memory location
@@ -130,8 +132,7 @@ public:
   bitmap loaded;   /* The set of loops in that this memory location
   is loaded from.  */
   vec accesses_in_loop;
-   /* The locations of the accesses.  Vector
-  indexed by the loop number.  */
+   /* The locations of the accesses.  */
 
   /* The following set is computed on demand.  */
   bitmap_head dep_loop;/* The set of loops in that the memory
@@ -1465,7 +1466,22 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt)
 return;
 
   mem = simple_mem_ref_in_stmt (stmt, &is_stored);
-  if (!mem)
+  if (!mem && is_gimple_assign (stmt))
+{
+  /* For aggregate copies record distinct references but use them
+only for disambiguation purposes.  */
+  id = memory_accesses.refs_list.length ();
+  ref = mem_ref_alloc (NULL, 0, id);
+  memory_accesses.refs_list.safe_push (ref);
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   {
+ fprintf (dump_file, "Unhandled memory reference %u: ", i

Re: [PATCH] testsuite: Add arm_arch_v7a_ok effective-target to pr57351.c

2021-07-07 Thread Christophe Lyon via Gcc-patches
ping?

On Wed, Jun 30, 2021 at 3:58 PM Christophe LYON via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

> I've noticed that overriding cpu/arch flags when running the testsuite
> can cause this test to fail rather than being skipped because of
> incompatible flags combination.
>
> Since the test forces -march=armv7-a, make sure it is accepted in
> combination with the current runtestflags.
>
> 2021-06-30  Christophe Lyon  
>
>  gcc/testsuite/
>  * gcc.dg/debug/pr57351.c: Require arm_arch_v7a_ok
>  effective-target.
>
>
>
>


[PATCH v2] Analyze niter for until-wrap condition [PR101145]

2021-07-07 Thread Jiufu Guo via Gcc-patches
Changes since v1:
* Update assumptions for niter, add more test cases check
* Use widest_int/wide_int instead mpz to do +-/
* Move some early check for quick return

For code like:
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i > val; ++i)
cnt++;
  return cnt;
}

The number of iterations should be about UINT_MAX - start.

There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64.
Is this ok for trunk?

gcc/ChangeLog:

2021-07-07  Jiufu Guo  

PR tree-optimization/101145
* tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.

gcc/testsuite/ChangeLog:

2021-07-07  Jiufu Guo  

PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
* gcc.dg/vect/pr101145inf.c: New test.
* gcc.dg/vect/pr101145inf.inc: New test.
* gcc.dg/vect/pr101145inf_1.c: New test.
---
 gcc/testsuite/gcc.dg/vect/pr101145.c  | 187 ++
 gcc/testsuite/gcc.dg/vect/pr101145.inc|  63 
 gcc/testsuite/gcc.dg/vect/pr101145_1.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145inf.c   |  25 +++
 gcc/testsuite/gcc.dg/vect/pr101145inf.inc |  28 
 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c |  23 +++
 gcc/tree-ssa-loop-niter.c | 157 ++
 9 files changed, 463 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c 
b/gcc/testsuite/gcc.dg/vect/pr101145.c
new file mode 100644
index 000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include 
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (UINT_MAX - 64 < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{  // infininate 
+  while (0 <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  //no loop
+  l = UINT_MAX;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (--l < 64)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  l = 32;
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+
+int a[3200], b[3200];
+int fail;
+
+int
+main ()
+{
+  unsigned l, n;
+  unsigned res;
+  /* l > n*/
+  n = UINT_MAX - 64;
+  l = n + 32;
+  res = foo (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n;
+  res = foo (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n - 1;
+  res = foo (a, b, l, n);
+  if (res != l + 1)
+fail++;
+ 

Re: [PATCH] New hook adjust_iv_update_pos

2021-07-07 Thread Richard Biener via Gcc-patches
On Tue, Jun 29, 2021 at 11:19 AM Xionghu Luo  wrote:
>
>
>
> On 2021/6/28 16:25, Richard Biener wrote:
> > On Mon, Jun 28, 2021 at 10:07 AM Xionghu Luo  wrote:
> >>
> >>
> >>
> >> On 2021/6/25 18:02, Richard Biener wrote:
> >>> On Fri, Jun 25, 2021 at 11:41 AM Xionghu Luo  wrote:
> 
> 
> 
>  On 2021/6/25 16:54, Richard Biener wrote:
> > On Fri, Jun 25, 2021 at 10:34 AM Xionghu Luo via Gcc-patches
> >  wrote:
> >>
> >> From: Xiong Hu Luo 
> >>
> >> adjust_iv_update_pos in tree-ssa-loop-ivopts doesn't help performance
> >> on Power.  For example, it generates mismatched address offset after
> >> adjust iv update statement position:
> >>
> >>  [local count: 70988443]:
> >> _84 = MEM[(uint8_t *)ip_229 + ivtmp.30_414 * 1];
> >> ivtmp.30_415 = ivtmp.30_414 + 1;
> >> _34 = ref_180 + 18446744073709551615;
> >> _86 = MEM[(uint8_t *)_34 + ivtmp.30_415 * 1];
> >> if (_84 == _86)
> >>  goto ; [94.50%]
> >>  else
> >>  goto ; [5.50%]
> >>
> >> Disable it will produce:
> >>
> >>  [local count: 70988443]:
> >> _84 = MEM[(uint8_t *)ip_229 + ivtmp.30_414 * 1];
> >> _86 = MEM[(uint8_t *)ref_180 + ivtmp.30_414 * 1];
> >> ivtmp.30_415 = ivtmp.30_414 + 1;
> >> if (_84 == _86)
> >>  goto ; [94.50%]
> >>  else
> >>  goto ; [5.50%]
> >>
> >> Then later pass loop unroll could benefit from same address offset
> >> with different base address and reduces register dependency.
> >> This patch could improve performance by 10% for typical case on Power,
> >> no performance change observed for X86 or Aarch64 due to small loops
> >> not unrolled on these platforms.  Any comments?
> >
> > The case you quote is special in that if we hoisted the IV update before
> > the other MEM _also_ used in the condition it would be fine again.
> 
>  Thanks.  I tried to hoist the IV update statement before the first MEM 
>  (Fix 2), it
>  shows even worse performance due to not unroll(two more "base-1" is 
>  generated in gimple,
>  then loop->ninsns is 11 so small loops is not unrolled), change the 
>  threshold from
>  10 to 12 in rs6000_loop_unroll_adjust would make it also unroll 2 times, 
>  the
>  performance is SAME to the one that IV update statement in the *MIDDLE* 
>  (trunk).
>    From the ASM, we can see the index register %r4 is used in two 
>  iterations which
>  maybe a bottle neck for hiding instruction latency?
> 
>  Then it seems reasonable the performance would be better if keep the IV 
>  update
>  statement at *LAST* (Fix 1).
> 
>  (Fix 2):
>   [local count: 70988443]:
>  ivtmp.30_415 = ivtmp.30_414 + 1;
>  _34 = ip_229 + 18446744073709551615;
>  _84 = MEM[(uint8_t *)_34 + ivtmp.30_415 * 1];
>  _33 = ref_180 + 18446744073709551615;
>  _86 = MEM[(uint8_t *)_33 + ivtmp.30_415 * 1];
>  if (_84 == _86)
>    goto ; [94.50%]
>  else
>    goto ; [5.50%]
> 
> 
>  .L67:
>    lbzx %r12,%r24,%r4
>    lbzx %r25,%r7,%r4
>    cmpw %cr0,%r12,%r25
>    bne %cr0,.L11
>    mr %r26,%r4
>    addi %r4,%r4,1
>    lbzx %r12,%r24,%r4
>    lbzx %r25,%r7,%r4
>    mr %r6,%r26
>    cmpw %cr0,%r12,%r25
>    bne %cr0,.L11
>    mr %r26,%r4
>  .L12:
>    cmpdi %cr0,%r10,1
>    addi %r4,%r26,1
>    mr %r6,%r26
>    addi %r10,%r10,-1
>    bne %cr0,.L67
> 
> >
> > Now, adjust_iv_update_pos doesn't seem to check that the
> > condition actually uses the IV use stmt def, so it likely applies to
> > too many cases.
> >
> > Unfortunately the introducing rev didn't come with a testcase,
> > but still I think fixing up adjust_iv_update_pos is better than
> > introducing a way to short-cut it per target decision.
> >
> > One "fix" might be to add a check that either the condition
> > lhs or rhs is the def of the IV use and the other operand
> > is invariant.  Or if it's of similar structure hoist across the
> > other iv-use as well.  Not that I understand the argument
> > about the overlapping life-range.
> >
> > You also don't provide a complete testcase ...
> >
> 
>  Attached the test code, will also add it it patch in future version.
>  The issue comes from a very small hot loop:
> 
>    do {
>  len++;
>    } while(len < maxlen && ip[len] == ref[len]);
> >>>
> >>> unsigned int foo (unsigned char *ip, unsigned char *ref, unsigned int 
> >>> maxlen)
> >>> {
> >>> unsigned int len = 2;
> >>> do {
> >>> len++;
> >>> }while(len < maxlen && ip[len] == ref[len]);
> >>> 

[PATCH V2] gcc: Add vec_select -> subreg RTL simplification

2021-07-07 Thread Jonathan Wright via Gcc-patches
Hi,

Version 2 of this patch adds more code generation tests to show the
benefit of this RTL simplification as well as adding a new helper function
'rtx_vec_series_p' to reduce code duplication.

Patch tested as version 1 - ok for master?

Thanks,
Jonathan

---

gcc/ChangeLog:

2021-06-08  Jonathan Wright  

* combine.c (combine_simplify_rtx): Add vec_select -> subreg
simplification.
* config/aarch64/aarch64.md 
(*zero_extend2_aarch64):
Add Neon to general purpose register case for zero-extend
pattern.
* config/arm/vfp.md (*arm_movsi_vfp): Remove "*" from *t -> r
case to prevent some cases opting to go through memory.
* cse.c (fold_rtx): Add vec_select -> subreg simplification.
* rtl.c (rtx_vec_series_p): Define helper function to
determine whether RTX vector-selection indices are in series.
* rtl.h (rtx_vec_series_p): Define.
* simplify-rtx.c (simplify_context::simplify_binary_operation_1):
Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/extract_zero_extend.c: Remove dump scan
for RTL pattern match.
* gcc.target/aarch64/narrow_high_combine.c: Add new tests.
* gcc.target/aarch64/simd/vmulx_laneq_f64_1.c: Update
scan-assembler regex to look for a scalar register instead of
lane 0 of a vector.
* gcc.target/aarch64/simd/vmulx_laneq_f64_1.c: Likewise.
* gcc.target/aarch64/simd/vmulxd_laneq_f64_1.c: Likewise.
* gcc.target/aarch64/simd/vmulxs_lane_f32_1.c: Likewise.
* gcc.target/aarch64/simd/vmulxs_laneq_f32_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlalh_lane_s16.c: Likewise.
* gcc.target/aarch64/simd/vqdmlals_lane_s32.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_lane_s16.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_lane_s32.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_lane_s16.c: Likewise.
* gcc.target/aarch64/simd/vqdmullh_laneq_s16.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_lane_s32.c: Likewise.
* gcc.target/aarch64/simd/vqdmulls_laneq_s32.c: Likewise.
* gcc.target/aarch64/sve/dup_lane_1.c: Likewise.
* gcc.target/aarch64/sve/live_1.c: Update scan-assembler regex
cases to look for 'b' and 'h' registers instead of 'w'.
* gcc.target/arm/mve/intrinsics/vgetq_lane_f16.c: Extract
lane 1 as the moves for lane 0 now get optimized away.
* gcc.target/arm/mve/intrinsics/vgetq_lane_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vgetq_lane_u8.c: Likewise.



From: Jonathan Wright
Sent: 02 July 2021 10:53
To: gcc-patches@gcc.gnu.org 
Cc: Richard Sandiford ; Kyrylo Tkachov 

Subject: [PATCH] gcc: Add vec_select -> subreg RTL simplification 
 
Hi,

As subject, this patch adds a new RTL simplification for the case of a
VEC_SELECT selecting the low part of a vector. The simplification
returns a SUBREG.

The primary goal of this patch is to enable better combinations of
Neon RTL patterns - specifically allowing generation of 'write-to-
high-half' narrowing intructions.

Adding this RTL simplification means that the expected results for a
number of tests need to be updated:
* aarch64 Neon: Update the scan-assembler regex for intrinsics tests
  to expect a scalar register instead of lane 0 of a vector.
* aarch64 SVE: Likewise.
* arm MVE: Use lane 1 instead of lane 0 for lane-extraction
  intrinsics tests (as the move instructions get optimized away for
  lane 0.)

Regression tested and bootstrapped on aarch64-none-linux-gnu,
x86_64-unknown-linux-gnu, arm-none-linux-gnueabihf and
aarch64_be-none-linux-gnu - no issues.

Ok for master?

Thanks,
Jonathan

---

gcc/ChangeLog:

2021-06-08  Jonathan Wright  

    * combine.c (combine_simplify_rtx): Add vec_select -> subreg
    simplification.
    * config/aarch64/aarch64.md 
(*zero_extend2_aarch64):
    Add Neon to general purpose register case for zero-extend
    pattern.
    * config/arm/vfp.md (*arm_movsi_vfp): Remove "*" from *t -> r
    case to prevent some cases opting to go through memory.
    * cse.c (fold_rtx): Add vec_select -> subreg simplification.
    * simplify-rtx.c (simplify_context::simplify_binary_operation_1):
    Likewise.

gcc/testsuite/ChangeLog:

    * gcc.target/aarch64/extract_zero_extend.c: Remove dump scan
    for RTL pattern match.
    * gcc.target/aarch64/simd/vmulx_laneq_f64_1.c: Update
    scan-assembler regex to look for a scalar register instead of
    lane 0 of a vector.
    * gcc.target/aarch64/simd/vmulx_laneq_f64_1.c: Likewise.

Re: [PING][PATCH] define auto_vec copy ctor and assignment (PR 90904)

2021-07-07 Thread Martin Sebor via Gcc-patches

On 7/7/21 1:28 AM, Richard Biener wrote:

On Tue, Jul 6, 2021 at 5:06 PM Martin Sebor  wrote:


Ping: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573968.html

Any questions/suggestions on the final patch or is it okay to commit?


I don't remember seeing one (aka saying "bootstrapped/tested, OK to commit?"
or so) - and the link above doesn't have one.

So, can you re-post it please?


The patch is attached to the email above with the following text
at the end:

  Attached is a revised patch with these changes (a superset of
  those I sent in response to Jason's question), tested on x86_64.

I've also attached it to this reply.

Martin



Thanks,
Richard.


On 6/29/21 7:46 PM, Martin Sebor wrote:

On 6/29/21 4:58 AM, Richard Biener wrote:

On Mon, Jun 28, 2021 at 8:07 PM Martin Sebor  wrote:


On 6/28/21 2:07 AM, Richard Biener wrote:

On Sat, Jun 26, 2021 at 12:36 AM Martin Sebor  wrote:


On 6/25/21 4:11 PM, Jason Merrill wrote:

On 6/25/21 4:51 PM, Martin Sebor wrote:

On 6/1/21 3:38 PM, Jason Merrill wrote:

On 6/1/21 3:56 PM, Martin Sebor wrote:

On 5/27/21 2:53 PM, Jason Merrill wrote:

On 4/27/21 11:52 AM, Martin Sebor via Gcc-patches wrote:

On 4/27/21 8:04 AM, Richard Biener wrote:

On Tue, Apr 27, 2021 at 3:59 PM Martin Sebor 
wrote:


On 4/27/21 1:58 AM, Richard Biener wrote:

On Tue, Apr 27, 2021 at 2:46 AM Martin Sebor via Gcc-patches
 wrote:


PR 90904 notes that auto_vec is unsafe to copy and assign
because
the class manages its own memory but doesn't define (or
delete)
either special function.  Since I first ran into the
problem,
auto_vec has grown a move ctor and move assignment from
a dynamically-allocated vec but still no copy ctor or copy
assignment operator.

The attached patch adds the two special functions to
auto_vec
along
with a few simple tests.  It makes auto_vec safe to use in
containers
that expect copyable and assignable element types and passes
bootstrap
and regression testing on x86_64-linux.


The question is whether we want such uses to appear since
those
can be quite inefficient?  Thus the option is to delete those
operators?


I would strongly prefer the generic vector class to have the
properties
expected of any other generic container: copyable and
assignable.  If
we also want another vector type with this restriction I
suggest
to add
another "noncopyable" type and make that property explicit in
its name.
I can submit one in a followup patch if you think we need one.


I'm not sure (and not strictly against the copy and assign).
Looking around
I see that vec<> does not do deep copying.  Making
auto_vec<> do it
might be surprising (I added the move capability to match
how vec<>
is used - as "reference" to a vector)


The vec base classes are special: they have no ctors at all
(because
of their use in unions).  That's something we might have to
live with
but it's not a model to follow in ordinary containers.


I don't think we have to live with it anymore, now that we're
writing C++11.


The auto_vec class was introduced to fill the need for a
conventional
sequence container with a ctor and dtor.  The missing copy
ctor and
assignment operators were an oversight, not a deliberate
feature.
This change fixes that oversight.

The revised patch also adds a copy ctor/assignment to the
auto_vec
primary template (that's also missing it).  In addition, it adds
a new class called auto_vec_ncopy that disables copying and
assignment as you prefer.


Hmm, adding another class doesn't really help with the confusion
richi mentions.  And many uses of auto_vec will pass them as vec,
which will still do a shallow copy.  I think it's probably better
to disable the copy special members for auto_vec until we fix
vec<>.


There are at least a couple of problems that get in the way of
fixing
all of vec to act like a well-behaved C++ container:

1) The embedded vec has a trailing "flexible" array member with
its
instances having different size.  They're initialized by memset
and
copied by memcpy.  The class can't have copy ctors or assignments
but it should disable/delete them instead.

2) The heap-based vec is used throughout GCC with the
assumption of
shallow copy semantics (not just as function arguments but also as
members of other such POD classes).  This can be changed by
providing
copy and move ctors and assignment operators for it, and also for
some of the classes in which it's a member and that are used with
the same assumption.

3) The heap-based vec::block_remove() assumes its elements are
PODs.
That breaks in VEC_ORDERED_REMOVE_IF (used in gcc/dwarf2cfi.c:2862
and tree-vect-patterns.c).

I took a stab at both and while (1) is easy, (2) is shaping up to
be a big and tricky project.  Tricky because it involves using
std::move in places where what's moved is subsequently still used.
I can keep plugging away at it but it won't change the fact that
the embedded and heap-based vecs have different requirements.

It doesn't seem to me that having a safely copyable auto_vec needs
to 

Re: [PATCH 2/4 REVIEW] libtool.m4: fix nm BSD flag detection

2021-07-07 Thread Nick Clifton via Gcc-patches

Hi Nick,


Ping?


Oops.


PR libctf/27482
* libtool.m4 (LT_PATH_NM): Try BSDization flags with a user-provided


Changes to libtool need to be posted to the libtool project:

  https://www.gnu.org/software/libtool/

They have mailing lists for bug reports and patch submissions.

Once the patch has been accepted there it can be backported to the gcc and
gdb/binutils repositories...

Cheers
  Nick



Re: [PATCH 1/2] CALL_INSN may not be a real function call.

2021-07-07 Thread Segher Boessenkool
Hi!

On Wed, Jul 07, 2021 at 10:15:08AM +0200, Richard Biener wrote:
> On Wed, Jul 7, 2021 at 4:40 AM Hongtao Liu via Gcc-patches
>  wrote:
> > On Tue, Jul 6, 2021 at 9:37 AM Hongtao Liu  wrote:
> > > On Tue, Jul 6, 2021 at 7:31 AM Segher Boessenkool
> > >  wrote:
> > > > I ran into this in shrink-wrap.c today.
> > > >
> > > > On Thu, Jun 03, 2021 at 02:54:07PM +0800, liuhongt via Gcc-patches 
> > > > wrote:
> > > > > Use "used" flag for CALL_INSN to indicate it's a fake call. If it's a
> > > > > fake call, it won't have its own function stack.
> > > >
> > > > Could you document somewhere what a "fake call" *is*?  Including what
> > > > that means to RTL, how this is expected to be used, etc.?  In rtl.h is
> > > fake call is used for TARGET_INSN_CALLEE_ABI, i'll add comments for
> > > #define FAKE_CALL_P(RTX) in rtl.h
> >
> >
> > Here's the patch I'm going to check in.

Which doesn't do any of the things I asked for :-(  It doesn't say what
a "fake call" is, it doesn't say what its semantics are, it doesn't say
how it is exected to be used.

So, a "FAKE_CALL" is very much a *real* call, on the RTL level, which is
where we are here.  But you want it to be treated differently because it
will eventually be replaced by different insns.

This causes all kinds of unrelated code to need confusing changes, made
much worse because the name "FAKE_CALL" is the opposite of what it does.

As long as your description of it only says how it is (ab)used in one
case, I will call it a hack, and a gross hack at that.


> > --- a/gcc/rtl.h
> > +++ b/gcc/rtl.h
> > @@ -840,7 +840,13 @@ struct GTY(()) rtvec_def {
> >  #define CALL_P(X) (GET_CODE (X) == CALL_INSN)
> >
> >  /* 1 if RTX is a call_insn for a fake call.
> > -   CALL_INSN use "used" flag to indicate it's a fake call.  */
> > +   CALL_INSN use "used" flag to indicate it's a fake call.
> > +   Used by the x86 vzeroupper instruction,
> > +   in order to solve the problem of partial clobber registers,
> > +   vzeroupper is defined as a call_insn with a special callee_abi,
> > +   but it is not a real call and therefore has no function stack
> > +   of its own.

So because of this one thing (you need to insert partial clobbers) you
force all kinds of unrelated code to have changes, namely, code thatt
needs to do something with calls, but now you do not want to have that
doone on some calls because you promise that call will disappear
eventually, and it cannot cause any problems in the mean time?

I am not convinced.  This is not design, this is a terrible hack, this
is the opposite direction we should go in.

> that doesn't set up a stack frame is fake as well?  Maybe
> 
>  "CALL_INSN use "used" flag to indicate the instruction
>   does not transfer control."
> 
> thus that this call is not affecting regular control flow? (it might
> eventually still trap and thus cause non-call EH?)

How it is used in shrink-wrap requires it to not have a stack frame (in
the compiler sense).

> Not sure if "no function stack of its own" is a good constraint,
> vzeroupper does not perform any call or jump.

Yeah.  This stuff needs a rethink.

What is wrong with just using an unspec and clobbers?


Segher


Re: [PATCH 1/2] CALL_INSN may not be a real function call.

2021-07-07 Thread Segher Boessenkool
On Mon, Jul 05, 2021 at 06:03:21PM -0600, Jeff Law wrote:
> It reminds me a bit of millicode calls on the PA or calls to special 
> routines in libgcc.  They're calls to functions, but those functions do 
> not follow the standard ABI.

Something with CALL_INSN_FUNCTION_USAGE?  And maybe some clobbers?


Segher


[PATCH] soft-fp: Update soft-fp from glibc

2021-07-07 Thread H.J. Lu via Gcc-patches
From: liuhongt 

1. Add __extendhfdf2/__extendhfsf2 to return an IEEE half converted to
IEEE double/single.
2. Add __truncdfhf2/__extendsfhf2 to truncate IEEE double/single into
IEEE half.
3. Add __eqhf2/__nehf2 to return 0 if a == b and a,b are not NAN,
otherwise return 1.

These are needed by x86 _Float16.

* soft-fp/eqhf2.c: New file.
* soft-fp/extendhfdf2.c: Likewise.
* soft-fp/extendhfsf2.c: Likewise.
* soft-fp/truncdfhf2.c: Likewise.
* soft-fp/truncsfhf2.c: Likewise.
* soft-fp/half.h (FP_CMP_EQ_H): New.
---
 libgcc/soft-fp/eqhf2.c   | 49 +
 libgcc/soft-fp/extendhfdf2.c | 53 
 libgcc/soft-fp/extendhfsf2.c | 49 +
 libgcc/soft-fp/half.h|  2 ++
 libgcc/soft-fp/truncdfhf2.c  | 52 +++
 libgcc/soft-fp/truncsfhf2.c  | 48 
 6 files changed, 253 insertions(+)
 create mode 100644 libgcc/soft-fp/eqhf2.c
 create mode 100644 libgcc/soft-fp/extendhfdf2.c
 create mode 100644 libgcc/soft-fp/extendhfsf2.c
 create mode 100644 libgcc/soft-fp/truncdfhf2.c
 create mode 100644 libgcc/soft-fp/truncsfhf2.c

diff --git a/libgcc/soft-fp/eqhf2.c b/libgcc/soft-fp/eqhf2.c
new file mode 100644
index 000..6d6634e5c54
--- /dev/null
+++ b/libgcc/soft-fp/eqhf2.c
@@ -0,0 +1,49 @@
+/* Software floating-point emulation.
+   Return 0 iff a == b, 1 otherwise
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   .  */
+
+#include "soft-fp.h"
+#include "half.h"
+
+CMPtype
+__eqhf2 (HFtype a, HFtype b)
+{
+  FP_DECL_EX;
+  FP_DECL_H (A);
+  FP_DECL_H (B);
+  CMPtype r;
+
+  FP_INIT_EXCEPTIONS;
+  FP_UNPACK_RAW_H (A, a);
+  FP_UNPACK_RAW_H (B, b);
+  FP_CMP_EQ_H (r, A, B, 1);
+  FP_HANDLE_EXCEPTIONS;
+
+  return r;
+}
+
+strong_alias (__eqhf2, __nehf2);
diff --git a/libgcc/soft-fp/extendhfdf2.c b/libgcc/soft-fp/extendhfdf2.c
new file mode 100644
index 000..337ba791d48
--- /dev/null
+++ b/libgcc/soft-fp/extendhfdf2.c
@@ -0,0 +1,53 @@
+/* Software floating-point emulation.
+   Return an IEEE half converted to IEEE double
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   .  */
+
+#define FP_NO_EXACT_UNDERFLOW
+#include "soft-fp.h"
+#include "half.h"
+#include "double.h"
+
+DFtype
+__extendhfdf2 (HFtype a)
+{
+  FP_DECL_EX;
+  FP_DECL_H (A);
+  FP_DECL_D (R);
+  DFtype r;
+
+  FP_INIT_EXCEPTIONS;
+  FP_UNPACK_RAW_H (A

Re: [PATCH 1/2] CALL_INSN may not be a real function call.

2021-07-07 Thread Hongtao Liu via Gcc-patches
On Wed, Jul 7, 2021 at 10:54 PM Segher Boessenkool
 wrote:
>
> Hi!
>
> On Wed, Jul 07, 2021 at 10:15:08AM +0200, Richard Biener wrote:
> > On Wed, Jul 7, 2021 at 4:40 AM Hongtao Liu via Gcc-patches
> >  wrote:
> > > On Tue, Jul 6, 2021 at 9:37 AM Hongtao Liu  wrote:
> > > > On Tue, Jul 6, 2021 at 7:31 AM Segher Boessenkool
> > > >  wrote:
> > > > > I ran into this in shrink-wrap.c today.
> > > > >
> > > > > On Thu, Jun 03, 2021 at 02:54:07PM +0800, liuhongt via Gcc-patches 
> > > > > wrote:
> > > > > > Use "used" flag for CALL_INSN to indicate it's a fake call. If it's 
> > > > > > a
> > > > > > fake call, it won't have its own function stack.
> > > > >
> > > > > Could you document somewhere what a "fake call" *is*?  Including what
> > > > > that means to RTL, how this is expected to be used, etc.?  In rtl.h is
> > > > fake call is used for TARGET_INSN_CALLEE_ABI, i'll add comments for
> > > > #define FAKE_CALL_P(RTX) in rtl.h
> > >
> > >
> > > Here's the patch I'm going to check in.
>
> Which doesn't do any of the things I asked for :-(  It doesn't say what
> a "fake call" is, it doesn't say what its semantics are, it doesn't say
> how it is exected to be used.
>
> So, a "FAKE_CALL" is very much a *real* call, on the RTL level, which is
> where we are here.  But you want it to be treated differently because it
> will eventually be replaced by different insns.
>
> This causes all kinds of unrelated code to need confusing changes, made
> much worse because the name "FAKE_CALL" is the opposite of what it does.
>
> As long as your description of it only says how it is (ab)used in one
> case, I will call it a hack, and a gross hack at that.
>
>
> > > --- a/gcc/rtl.h
> > > +++ b/gcc/rtl.h
> > > @@ -840,7 +840,13 @@ struct GTY(()) rtvec_def {
> > >  #define CALL_P(X) (GET_CODE (X) == CALL_INSN)
> > >
> > >  /* 1 if RTX is a call_insn for a fake call.
> > > -   CALL_INSN use "used" flag to indicate it's a fake call.  */
> > > +   CALL_INSN use "used" flag to indicate it's a fake call.
> > > +   Used by the x86 vzeroupper instruction,
> > > +   in order to solve the problem of partial clobber registers,
> > > +   vzeroupper is defined as a call_insn with a special callee_abi,
> > > +   but it is not a real call and therefore has no function stack
> > > +   of its own.
>
> So because of this one thing (you need to insert partial clobbers) you
> force all kinds of unrelated code to have changes, namely, code thatt
> needs to do something with calls, but now you do not want to have that
> doone on some calls because you promise that call will disappear
> eventually, and it cannot cause any problems in the mean time?
>
> I am not convinced.  This is not design, this is a terrible hack, this
> is the opposite direction we should go in.
>
> > that doesn't set up a stack frame is fake as well?  Maybe
> >
> >  "CALL_INSN use "used" flag to indicate the instruction
> >   does not transfer control."
> >
> > thus that this call is not affecting regular control flow? (it might
> > eventually still trap and thus cause non-call EH?)
>
> How it is used in shrink-wrap requires it to not have a stack frame (in
> the compiler sense).
>
> > Not sure if "no function stack of its own" is a good constraint,
> > vzeroupper does not perform any call or jump.
>
> Yeah.  This stuff needs a rethink.
>
> What is wrong with just using an unspec and clobbers?
>
It's partial and **potential clobber**,  if we add unspec and clobbers
to the whole pack(8 or 16 xmm registers), it will force save/restore
of registers that aren't really needed in the function, especially for
64bit MS ABI where lower 128bit are preserved across function calls.
>
> Segher



-- 
BR,
Hongtao


[PATCH] c++: Fix PR101247 in another way

2021-07-07 Thread Patrick Palka via Gcc-patches
r12-1989 fixed the testcase in the PR, but unfortunately the fix is
buggy:

  1. It breaks the case where the common template between the
 TEMPLATE_DECL t and ctx_parms is the innermost template (as in
 concepts-memtmpl5.C below).  This can be fixed by instead
 passing the TREE_TYPE of ctmpl to common_enclosing_class when
 ctmpl is a class template.
  2. Even if that's fixed, the analogous case where the innermost
 template is a partial specialization is still broken (as in
 concepts-memtmpl5a.C below), because ctmpl is always the primary
 template.

So this patch instead fixes the general problem in a a simpler way that
doesn't rely on ctx_parms at all: when looking for the template
parameters of a TEMPLATE_DECL that are shared with the current template
context, just walk its DECL_CONTEXT.  As long as the template is not
overly general (e.g. we didn't pass it through most_general_template),
this should give us exactly what we want, since if a TEMPLATE_DECL can
be referred to from some template context then the template parameters
it uses must all be in-scope and represented in its DECL_CONTEXT.  This
effectively makes us treat TEMPLATE_DECLs more similarly to other _DECLs
(whose DECL_CONTEXT we also walk).

Bootstrapped and regtested on x86_64-pc-linux-gnu, also tested on
cmcstl2 and range-v3, does this look OK for trunk/11?

PR c++/101247

gcc/cp/ChangeLog:

* pt.c (any_template_parm_r) : Just walk the
DECL_CONTEXT.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-memtmpl4.C: Uncomment the commented out
example, which we now handle correctly.
* g++.dg/cpp2a/concepts-memtmpl5.C: New test.
* g++.dg/cpp2a/concepts-memtmpl5a.C: New test.
---
 gcc/cp/pt.c | 14 +-
 gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl4.C  |  2 +-
 gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5.C  | 11 +++
 gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5a.C | 15 +++
 4 files changed, 32 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5a.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 7e56ccfc45f..dc0f0b7b58e 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -10728,15 +10728,11 @@ any_template_parm_r (tree t, void *data)
   break;
 
 case TEMPLATE_DECL:
-  {
-   /* If T is a member template that shares template parameters with
-  ctx_parms, we need to mark all those parameters for mapping.  */
-   if (tree ctmpl = TREE_TYPE (INNERMOST_TEMPLATE_PARMS (ftpi->ctx_parms)))
- if (tree com = common_enclosing_class (DECL_CONTEXT (t),
-DECL_CONTEXT (ctmpl)))
-   if (tree ti = CLASSTYPE_TEMPLATE_INFO (com))
- WALK_SUBTREE (TI_ARGS (ti));
-  }
+  /* If T is a member template that shares template parameters with
+ctx_parms, we need to mark all those parameters for mapping.
+To that end, it should suffice to just walk the DECL_CONTEXT of
+the template (assuming the template is not overly general).  */
+  WALK_SUBTREE (DECL_CONTEXT (t));
   break;
 
 case LAMBDA_EXPR:
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl4.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl4.C
index 625149e5025..f990ae17859 100644
--- a/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl4.C
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl4.C
@@ -24,5 +24,5 @@ int main()
 {
   A::B::f(0);
   A::C::f(0);
-  // A::C::g();
+  A::C::g();
 }
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5.C
new file mode 100644
index 000..3c83bb88485
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5.C
@@ -0,0 +1,11 @@
+// PR c++/101247
+// { dg-do compile { target concepts } }
+
+template struct A {
+  template static constexpr bool d = true;
+  static void g() requires d;
+};
+
+int main() {
+  A::g();
+}
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5a.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5a.C
new file mode 100644
index 000..458f1cdf856
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5a.C
@@ -0,0 +1,15 @@
+// PR c++/101247
+// { dg-do compile { target concepts } }
+// A variant of concepts-memtmpl5.C that uses a partial specialization
+// of A instead of the primary template.
+
+template struct A;
+
+template requires true struct A {
+  template static constexpr bool d = true;
+  static void g() requires d;
+};
+
+int main() {
+  A::g();
+}
-- 
2.32.0.93.g670b81a890



Re: [PATCH 1/2] CALL_INSN may not be a real function call.

2021-07-07 Thread Hongtao Liu via Gcc-patches
On Wed, Jul 7, 2021 at 10:54 PM Segher Boessenkool
 wrote:
>
> Hi!
>
> On Wed, Jul 07, 2021 at 10:15:08AM +0200, Richard Biener wrote:
> > On Wed, Jul 7, 2021 at 4:40 AM Hongtao Liu via Gcc-patches
> >  wrote:
> > > On Tue, Jul 6, 2021 at 9:37 AM Hongtao Liu  wrote:
> > > > On Tue, Jul 6, 2021 at 7:31 AM Segher Boessenkool
> > > >  wrote:
> > > > > I ran into this in shrink-wrap.c today.
> > > > >
> > > > > On Thu, Jun 03, 2021 at 02:54:07PM +0800, liuhongt via Gcc-patches 
> > > > > wrote:
> > > > > > Use "used" flag for CALL_INSN to indicate it's a fake call. If it's 
> > > > > > a
> > > > > > fake call, it won't have its own function stack.
> > > > >
> > > > > Could you document somewhere what a "fake call" *is*?  Including what
> > > > > that means to RTL, how this is expected to be used, etc.?  In rtl.h is
> > > > fake call is used for TARGET_INSN_CALLEE_ABI, i'll add comments for
> > > > #define FAKE_CALL_P(RTX) in rtl.h
> > >
> > >
> > > Here's the patch I'm going to check in.
>
> Which doesn't do any of the things I asked for :-(  It doesn't say what
> a "fake call" is, it doesn't say what its semantics are, it doesn't say
> how it is exected to be used.
>
> So, a "FAKE_CALL" is very much a *real* call, on the RTL level, which is
> where we are here.  But you want it to be treated differently because it
> will eventually be replaced by different insns.
It's CALL_INSN on the rtl level,  but it's just a normal instruction
that it doesn't have a call stack, and it doesn't affect the control
flow
>
> This causes all kinds of unrelated code to need confusing changes, made
> much worse because the name "FAKE_CALL" is the opposite of what it does.
>
> As long as your description of it only says how it is (ab)used in one
> case, I will call it a hack, and a gross hack at that.
>
>
> > > --- a/gcc/rtl.h
> > > +++ b/gcc/rtl.h
> > > @@ -840,7 +840,13 @@ struct GTY(()) rtvec_def {
> > >  #define CALL_P(X) (GET_CODE (X) == CALL_INSN)
> > >
> > >  /* 1 if RTX is a call_insn for a fake call.
> > > -   CALL_INSN use "used" flag to indicate it's a fake call.  */
> > > +   CALL_INSN use "used" flag to indicate it's a fake call.
> > > +   Used by the x86 vzeroupper instruction,
> > > +   in order to solve the problem of partial clobber registers,
> > > +   vzeroupper is defined as a call_insn with a special callee_abi,
> > > +   but it is not a real call and therefore has no function stack
> > > +   of its own.
>
> So because of this one thing (you need to insert partial clobbers) you
> force all kinds of unrelated code to have changes, namely, code thatt
> needs to do something with calls, but now you do not want to have that
> doone on some calls because you promise that call will disappear
> eventually, and it cannot cause any problems in the mean time?
>
> I am not convinced.  This is not design, this is a terrible hack, this
> is the opposite direction we should go in.

Quote from  https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570634.html

> Also i grep CALL_P or CALL_INSN in GCC source codes, there are many
> places which hold the assumption CALL_P/CALL_INSN is a real call.
> Considering that vzeroupper is used a lot on the i386 backend, I'm a
> bit worried that this implementation solution will be a bottomless
> pit.

Maybe, but I think the same is true for CLOBBER_HIGH.  If we have
a third alternative then we should consider it, but I think the
call approach is still going to be less problematic then CLOBBER_HIGH.

The main advantage of the call approach is that the CALL_P handling
is (mostly) conservatively correct and performance problems are just
a one-line change.  The CLOBBER_HIGH approach instead requires
changes to the way that passes track liveness information for
non-call instructions (so is much more than a one-line change).
Also, treating a CLOBBER_HIGH like a CLOBBER isn't conservatively
correct, because other code might be relying on part of the register
being preserved.

>
> > that doesn't set up a stack frame is fake as well?  Maybe
> >
> >  "CALL_INSN use "used" flag to indicate the instruction
> >   does not transfer control."
> >
> > thus that this call is not affecting regular control flow? (it might
> > eventually still trap and thus cause non-call EH?)
>
> How it is used in shrink-wrap requires it to not have a stack frame (in
> the compiler sense).
>
> > Not sure if "no function stack of its own" is a good constraint,
> > vzeroupper does not perform any call or jump.
>
> Yeah.  This stuff needs a rethink.
>
> What is wrong with just using an unspec and clobbers?
>
>
> Segher



-- 
BR,
Hongtao


Re: [PATCH] test/rs6000: Add cases to cover vector multiply

2021-07-07 Thread Segher Boessenkool
Hi!

On Wed, Jul 07, 2021 at 05:02:42PM +0800, Kewen.Lin wrote:
> This patch is to add test cases to check if vectorizer
> can exploit vector multiply instrutions on Power, some
> of them are supported since Power8, the other are newly
> introduced by Power10.

Okay for trunk.  Thank you!


Segher


Re: [PATCH 1/2] CALL_INSN may not be a real function call.

2021-07-07 Thread Hongtao Liu via Gcc-patches
On Wed, Jul 7, 2021 at 4:15 PM Richard Biener
 wrote:
>
> On Wed, Jul 7, 2021 at 4:40 AM Hongtao Liu via Gcc-patches
>  wrote:
> >
> > On Tue, Jul 6, 2021 at 9:37 AM Hongtao Liu  wrote:
> > >
> > > On Tue, Jul 6, 2021 at 7:31 AM Segher Boessenkool
> > >  wrote:
> > > >
> > > > Hi!
> > > >
> > > > I ran into this in shrink-wrap.c today.
> > > >
> > > > On Thu, Jun 03, 2021 at 02:54:07PM +0800, liuhongt via Gcc-patches 
> > > > wrote:
> > > > > Use "used" flag for CALL_INSN to indicate it's a fake call. If it's a
> > > > > fake call, it won't have its own function stack.
> > > >
> > > > Could you document somewhere what a "fake call" *is*?  Including what
> > > > that means to RTL, how this is expected to be used, etc.?  In rtl.h is
> > > fake call is used for TARGET_INSN_CALLEE_ABI, i'll add comments for
> > > #define FAKE_CALL_P(RTX) in rtl.h
> >
> >
> > Here's the patch I'm going to check in.
> >
> > Document FAKE_CALL_P in comments.
> >
> > gcc/ChangeLog:
> >
> > * rtl.h (FAKE_CALL_P): Add comments for FAKE_CALL_P.
> >
> > diff --git a/gcc/rtl.h b/gcc/rtl.h
> > index 5ed0d6dd6fa..9afc60f08d8 100644
> > --- a/gcc/rtl.h
> > +++ b/gcc/rtl.h
> > @@ -840,7 +840,13 @@ struct GTY(()) rtvec_def {
> >  #define CALL_P(X) (GET_CODE (X) == CALL_INSN)
> >
> >  /* 1 if RTX is a call_insn for a fake call.
> > -   CALL_INSN use "used" flag to indicate it's a fake call.  */
> > +   CALL_INSN use "used" flag to indicate it's a fake call.
> > +   Used by the x86 vzeroupper instruction,
> > +   in order to solve the problem of partial clobber registers,
> > +   vzeroupper is defined as a call_insn with a special callee_abi,
> > +   but it is not a real call and therefore has no function stack
> > +   of its own.
>
> I think that's a big vague - you could then say a sibling or tail call
> to a function
> that doesn't set up a stack frame is fake as well?  Maybe
>
>  "CALL_INSN use "used" flag to indicate the instruction
>   does not transfer control."
>
> thus that this call is not affecting regular control flow? (it might
> eventually still trap and thus cause non-call EH?)
>
> Not sure if "no function stack of its own" is a good constraint,
> vzeroupper does not perform any call or jump.
>
How about this?

 /* 1 if RTX is a call_insn for a fake call.
-   CALL_INSN use "used" flag to indicate it's a fake call.  */
+   CALL_INSN use "used" flag to indicate it's a fake call.
+   FIXME: it's only used by x86 vzeroupper to indicate it's
+   a normal instruction which doesn't have function stack
+   and doesn't impact control flow, and FAKE_CALL_P is not
+   handled thoroughly in the RTL, PR82735.  */

> > +   NB: FAKE_CALL_P is not handled thoroughly in the RTL.  */
> >  #define FAKE_CALL_P(RTX)\
> >(RTL_FLAG_CHECK1 ("FAKE_CALL_P", (RTX), CALL_INSN)->used)
> >
> >
> >
> >
> > --
> > BR,
> > Hongtao



-- 
BR,
Hongtao


[Ada] Unsynchronized access to a Boolean in tasking state

2021-07-07 Thread Pierre-Marie de Rodat
The Terminated flag for a task was being queried without first aquiring
the task lock. It is not clear that this unsychronized access has ever
caused a problem in practice, but the thread-sanitizer tool flags it.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnarl/s-tassta.adb (Free_Task): Acquire the Task_Lock
before, rather than after, querying the task's Terminated flag.
Add a corresponding Task_Unlock call.diff --git a/gcc/ada/libgnarl/s-tassta.adb b/gcc/ada/libgnarl/s-tassta.adb
--- a/gcc/ada/libgnarl/s-tassta.adb
+++ b/gcc/ada/libgnarl/s-tassta.adb
@@ -910,12 +910,12 @@ package body System.Tasking.Stages is
   Self_Id : constant Task_Id := Self;
 
begin
+  Initialization.Task_Lock (Self_Id);
+
   if T.Common.State = Terminated then
 
  --  It is not safe to call Abort_Defer or Write_Lock at this stage
 
- Initialization.Task_Lock (Self_Id);
-
  Lock_RTS;
  Initialization.Finalize_Attributes (T);
  Initialization.Remove_From_All_Tasks_List (T);
@@ -930,6 +930,7 @@ package body System.Tasking.Stages is
  --  upon termination.
 
  T.Free_On_Termination := True;
+ Initialization.Task_Unlock (Self_Id);
   end if;
end Free_Task;
 




[Ada] Transient scope cleanup

2021-07-07 Thread Pierre-Marie de Rodat
Use Tbuild.Unchecked_Convert_To instead of
Nmake.Make_Unchecked_Type_Conversion. This leads to more readable source
code in the compiler, and also more readable .dg code, because it
removes redundant unchecked conversions.

There is only one remaining call to Make_Unchecked_Type_Conversion,
which is in Unchecked_Convert_To.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* checks.adb, exp_attr.adb, exp_ch4.adb, exp_ch6.adb,
exp_ch9.adb, exp_disp.adb, exp_util.adb, inline.adb,
sem_res.adb: Change all calls to Make_Unchecked_Type_Conversion
to call Unchecked_Convert_To instead. This involves removing
New_Occurrence_Of on the first parameter, because
Unchecked_Convert_To expects a type entity, rather than the name
of one. Also, removed calls to Relocate_Node, because
Unchecked_Convert_To takes care of that.
* sinfo.ads: Change comment to be worded more firmly.diff --git a/gcc/ada/checks.adb b/gcc/ada/checks.adb
--- a/gcc/ada/checks.adb
+++ b/gcc/ada/checks.adb
@@ -7831,10 +7831,8 @@ package body Checks is
  New_Occurrence_Of (Target_Base_Type, Loc),
Constant_Present=> True,
Expression  =>
- Make_Unchecked_Type_Conversion (Loc,
-   Subtype_Mark =>
- New_Occurrence_Of (Target_Base_Type, Loc),
-   Expression   => Duplicate_Subexpr (N))),
+ Unchecked_Convert_To
+   (Target_Base_Type, Duplicate_Subexpr (N))),
 
  Make_Raise_Constraint_Error (Loc,
Condition =>


diff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb
--- a/gcc/ada/exp_attr.adb
+++ b/gcc/ada/exp_attr.adb
@@ -2799,10 +2799,9 @@ package body Exp_Attr is
 Name   =>
   New_Occurrence_Of (RTE (RE_Callable), Loc),
 Parameter_Associations => New_List (
-  Make_Unchecked_Type_Conversion (Loc,
-Subtype_Mark =>
-  New_Occurrence_Of (RTE (RO_ST_Task_Id), Loc),
-Expression   => Build_Disp_Get_Task_Id_Call (Pref);
+  Unchecked_Convert_To
+(RTE (RO_ST_Task_Id),
+ Build_Disp_Get_Task_Id_Call (Pref);
 
  else
 Rewrite (N, Build_Call_With_Task (Pref, RTE (RE_Callable)));
@@ -6746,10 +6745,9 @@ package body Exp_Attr is
 Name   =>
   New_Occurrence_Of (RTE (RE_Terminated), Loc),
 Parameter_Associations => New_List (
-  Make_Unchecked_Type_Conversion (Loc,
-Subtype_Mark =>
-  New_Occurrence_Of (RTE (RO_ST_Task_Id), Loc),
-Expression   => Build_Disp_Get_Task_Id_Call (Pref);
+  Unchecked_Convert_To
+(RTE (RO_ST_Task_Id),
+ Build_Disp_Get_Task_Id_Call (Pref);
 
  elsif Restricted_Profile then
 Rewrite (N,


diff --git a/gcc/ada/exp_ch4.adb b/gcc/ada/exp_ch4.adb
--- a/gcc/ada/exp_ch4.adb
+++ b/gcc/ada/exp_ch4.adb
@@ -11992,9 +11992,8 @@ package body Exp_Ch4 is
--  unchecked conversion to the target fixed-point type.
 
Conv :=
- Make_Unchecked_Type_Conversion (Loc,
-   Subtype_Mark => New_Occurrence_Of (Target_Type, Loc),
-   Expression   => New_Occurrence_Of (Expr_Id, Loc));
+ Unchecked_Convert_To
+   (Target_Type, New_Occurrence_Of (Expr_Id, Loc));
 end;
 
  --  All other conversions
@@ -12515,10 +12514,7 @@ package body Exp_Ch4 is
   Conv : Node_Id;
begin
   Make_Tag_Check (Class_Wide_Type (Actual_Targ_Typ));
-  Conv :=
-Make_Unchecked_Type_Conversion (Loc,
-  Subtype_Mark => New_Occurrence_Of (Target_Type, Loc),
-  Expression   => Relocate_Node (Expression (N)));
+  Conv := Unchecked_Convert_To (Target_Type, Expression (N));
   Rewrite (N, Conv);
   Analyze_And_Resolve (N, Target_Type);
end;


diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -5852,11 +5852,9 @@ package body Exp_Ch6 is
  Name   =>
New_Occurrence_Of (Alloc_Obj_Id, Loc),
  Expression =>
-   Make_Unchecked_Type_Conversion (Loc,
- Subtype_Mark =>
-   New_Occurrence_Of (Ref_Type, Loc),
- Expression   =>
-   New_Occurrence_Of (Obj_Acc_Formal

[Ada] Simplify iteration over pending instantiations

2021-07-07 Thread Pierre-Marie de Rodat
Code cleanup; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* inline.adb (Instantiate_Bodies): Fix white in declaration.
(Remove_Dead_Instance): Change iteration from WHILE to FOR.diff --git a/gcc/ada/inline.adb b/gcc/ada/inline.adb
--- a/gcc/ada/inline.adb
+++ b/gcc/ada/inline.adb
@@ -4806,7 +4806,7 @@ package body Inline is
  end if;
   end Instantiate_Body;
 
-  J, K  : Nat;
+  J, K : Nat;
   Info : Pending_Body_Info;
 
--  Start of processing for Instantiate_Bodies
@@ -5153,17 +5153,12 @@ package body Inline is
--
 
procedure Remove_Dead_Instance (N : Node_Id) is
-  J : Int;
-
begin
-  J := 0;
-  while J <= Pending_Instantiations.Last loop
+  for J in 0 .. Pending_Instantiations.Last loop
  if Pending_Instantiations.Table (J).Inst_Node = N then
 Pending_Instantiations.Table (J).Inst_Node := Empty;
 return;
  end if;
-
- J := J + 1;
   end loop;
end Remove_Dead_Instance;
 




[Ada] Tune discovery of No_Elaboration_Code restriction

2021-07-07 Thread Pierre-Marie de Rodat
When discovering violation of the No_Elaboration_Code restriction it is
enough to find one unit that violates it; we don't need to examine all
units.

Code cleanup; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* lib-writ.adb (Write_ALI): Exit from loop after seeing first
unit that violates No_Elaboration_Code restriction.diff --git a/gcc/ada/lib-writ.adb b/gcc/ada/lib-writ.adb
--- a/gcc/ada/lib-writ.adb
+++ b/gcc/ada/lib-writ.adb
@@ -1256,6 +1256,7 @@ package body Lib.Writ is
  if Units.Table (Unit).Generate_Code or else Unit = Main_Unit then
 if not Has_No_Elaboration_Code (Cunit (Unit)) then
Main_Restrictions.Violated (No_Elaboration_Code) := True;
+   exit;
 end if;
  end if;
   end loop;




[Ada] Add socket options to control keepalive on TCP connection

2021-07-07 Thread Pierre-Marie de Rodat
This adds socket options that are needed to control the keepalive status
of TCP connections. The new options are Keep_Alive_Count,
Keep_Alive_Idle, and Keep_Alive_Interval.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/g-socket.ads (Option_Name): Add Keep_Alive_Count,
Keep_Alive_Idle, and Keep_Alive_Interval items to enumeration.
(Option_Type): Add Keep_Alive_Count, Keep_Alive_Idle, and
Keep_Alive_Interval alternatives to the case of discriminated
record.
* libgnat/g-socket.adb (Options): Add Keep_Alive_Count,
Keep_Alive_Idle, and Keep_Alive_Interval to items enumerator to
OS constant converter.
(Set_Socket_Option): Process Keep_Alive_Count, Keep_Alive_Idle,
and Keep_Alive_Interval socket options.
(Get_Socket_Option): Idem.diff --git a/gcc/ada/libgnat/g-socket.adb b/gcc/ada/libgnat/g-socket.adb
--- a/gcc/ada/libgnat/g-socket.adb
+++ b/gcc/ada/libgnat/g-socket.adb
@@ -96,6 +96,9 @@ package body GNAT.Sockets is
 
Options : constant array (Specific_Option_Name) of C.int :=
(Keep_Alive  => SOSC.SO_KEEPALIVE,
+Keep_Alive_Count=> SOSC.TCP_KEEPCNT,
+Keep_Alive_Idle => SOSC.TCP_KEEPIDLE,
+Keep_Alive_Interval => SOSC.TCP_KEEPINTVL,
 Reuse_Address   => SOSC.SO_REUSEADDR,
 Broadcast   => SOSC.SO_BROADCAST,
 Send_Buffer => SOSC.SO_SNDBUF,
@@ -1442,6 +1445,9 @@ package body GNAT.Sockets is
 | Error
 | Generic_Option
 | Keep_Alive
+| Keep_Alive_Count
+| Keep_Alive_Idle
+| Keep_Alive_Interval
 | Multicast_If_V4
 | Multicast_If_V6
 | Multicast_Loop_V4
@@ -1511,6 +1517,15 @@ package body GNAT.Sockets is
  =>
 Opt.Enabled := (V4 /= 0);
 
+ when Keep_Alive_Count =>
+Opt.Count := Natural (V4);
+
+ when Keep_Alive_Idle =>
+Opt.Idle_Seconds := Natural (V4);
+
+ when Keep_Alive_Interval =>
+Opt.Interval_Seconds := Natural (V4);
+
  when Busy_Polling =>
 Opt.Microseconds := Natural (V4);
 
@@ -2620,6 +2635,21 @@ package body GNAT.Sockets is
 Len := V4'Size / 8;
 Add := V4'Address;
 
+ when Keep_Alive_Count =>
+V4  := C.int (Option.Count);
+Len := V4'Size / 8;
+Add := V4'Address;
+
+ when Keep_Alive_Idle =>
+V4  := C.int (Option.Idle_Seconds);
+Len := V4'Size / 8;
+Add := V4'Address;
+
+ when Keep_Alive_Interval =>
+V4  := C.int (Option.Interval_Seconds);
+Len := V4'Size / 8;
+Add := V4'Address;
+
  when Busy_Polling =>
 V4  := C.int (Option.Microseconds);
 Len := V4'Size / 8;


diff --git a/gcc/ada/libgnat/g-socket.ads b/gcc/ada/libgnat/g-socket.ads
--- a/gcc/ada/libgnat/g-socket.ads
+++ b/gcc/ada/libgnat/g-socket.ads
@@ -845,11 +845,20 @@ package GNAT.Sockets is
   -- IP_Protocol_For_TCP_Level --
   ---
 
-  No_Delay, -- TCP_NODELAY
+  No_Delay,-- TCP_NODELAY
   --  Disable the Nagle algorithm. This means that output buffer content
   --  is always sent as soon as possible, even if there is only a small
   --  amount of data.
 
+  Keep_Alive_Count,-- TCP_KEEPCNT
+  --  Maximum number of keepalive probes
+
+  Keep_Alive_Idle, -- TCP_KEEPIDLE
+  --  Idle time before TCP starts sending keepalive probes
+
+  Keep_Alive_Interval, -- TCP_KEEPINTVL
+  --  Time between individual keepalive probes
+
   --
   -- IP_Protocol_For_IP_Level --
   --
@@ -923,26 +932,35 @@ package GNAT.Sockets is
 Enabled : Boolean;
 
 case Name is
-   when Linger=>
+   when Linger =>
   Seconds : Natural;
-   when others=>
+   when others =>
   null;
 end case;
 
- when Busy_Polling=>
+ when Keep_Alive_Count=>
+Count : Natural;
+
+ when Keep_Alive_Idle =>
+Idle_Seconds : Natural;
+
+ when Keep_Alive_Interval =>
+Interval_Seconds : Natural;
+
+ when Busy_Polling=>
 Microseconds : Natural;
 
- when Send_Buffer |
-  Receive_Buffer  =>
+ when Send_Buffer |
+  Receive_Buffer  =>
 Size : Natural;
 
- when Error   =>
+ when Error   =>
 Error : Error_Type;
 
- when Add_Membership_V4  |
-  Add_Membership_V6  |
-  Drop_Membership_V4 |
-  Drop_Membership_V6 =>
+ wh

[Ada] Simplify handling of Generate_Code flag for compilation units

2021-07-07 Thread Pierre-Marie de Rodat
There are three kinds of units that require code generation: the main
unit, its corresponding spec and generic instances needed by the main
unit.

Previously the main unit and its corresponding spec were flagged as
requiring code generation just before calling the backend, while
instance units were flagged while they were created, which was
inconsistent.

Now all of them are flagged as soon as they are created, which both
appears to be simpler and makes the Generate_Code flag valid all the
time.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gnat1drv.adb (Gnat1drv): Remove flagging of main unit and its
corresponding spec as requiring code generation; now the flags
are set much earlier.
* lib-load.adb (Load_Main_Source): Set Generate_Code flag on the
main unit source.
(Make_Instance_Unit): Copy Generate_Code flag from the main unit
to instance units.
* lib-writ.adb (Write_ALI): Remove redundant condition;
Generate_Code flag is always set for the main unit.
* par-load.adb (Load): Set Generate_Code flag on the main unit's
corresponding spec, if any.diff --git a/gcc/ada/gnat1drv.adb b/gcc/ada/gnat1drv.adb
--- a/gcc/ada/gnat1drv.adb
+++ b/gcc/ada/gnat1drv.adb
@@ -1287,29 +1287,6 @@ begin
  Exit_Program (E_Errors);
   end if;
 
-  --  Set Generate_Code on main unit and its spec. We do this even if are
-  --  not generating code, since Lib-Writ uses this to determine which
-  --  units get written in the ali file.
-
-  Set_Generate_Code (Main_Unit);
-
-  --  If we have a corresponding spec, and it comes from source or it is
-  --  not a generated spec for a child subprogram body, then we need object
-  --  code for the spec unit as well.
-
-  if Nkind (Unit (Main_Unit_Node)) in N_Unit_Body
-and then not Acts_As_Spec (Main_Unit_Node)
-  then
- if Nkind (Unit (Main_Unit_Node)) = N_Subprogram_Body
-   and then not Comes_From_Source (Library_Unit (Main_Unit_Node))
- then
-null;
- else
-Set_Generate_Code
-  (Get_Cunit_Unit_Number (Library_Unit (Main_Unit_Node)));
- end if;
-  end if;
-
   --  Case of no code required to be generated, exit indicating no error
 
   if Original_Operating_Mode = Check_Syntax then


diff --git a/gcc/ada/lib-load.adb b/gcc/ada/lib-load.adb
--- a/gcc/ada/lib-load.adb
+++ b/gcc/ada/lib-load.adb
@@ -364,7 +364,7 @@ package body Lib.Load is
 Error_Location => No_Location,
 Expected_Unit  => No_Unit_Name,
 Fatal_Error=> None,
-Generate_Code  => False,
+Generate_Code  => True,
 Has_RACW   => False,
 Filler => False,
 Ident_String   => Empty,
@@ -964,13 +964,12 @@ package body Lib.Load is
   Units.Increment_Last;
 
   if In_Main then
- Units.Table (Units.Last)   := Units.Table (Main_Unit);
- Units.Table (Units.Last).Cunit := Library_Unit (N);
- Units.Table (Units.Last).Generate_Code := True;
+ Units.Table (Units.Last):= Units.Table (Main_Unit);
+ Units.Table (Units.Last).Cunit  := Library_Unit (N);
  Init_Unit_Name (Units.Last, Unit_Name (Main_Unit));
 
- Units.Table (Main_Unit).Cunit  := N;
- Units.Table (Main_Unit).Version:= Source_Checksum (Sind);
+ Units.Table (Main_Unit).Cunit   := N;
+ Units.Table (Main_Unit).Version := Source_Checksum (Sind);
  Init_Unit_Name (Main_Unit,
Get_Body_Name
  (Unit_Name (Get_Cunit_Unit_Number (Library_Unit (N);


diff --git a/gcc/ada/lib-writ.adb b/gcc/ada/lib-writ.adb
--- a/gcc/ada/lib-writ.adb
+++ b/gcc/ada/lib-writ.adb
@@ -1253,7 +1253,7 @@ package body Lib.Writ is
   --  for which we have generated code
 
   for Unit in Units.First .. Last_Unit loop
- if Units.Table (Unit).Generate_Code or else Unit = Main_Unit then
+ if Units.Table (Unit).Generate_Code then
 if not Has_No_Elaboration_Code (Cunit (Unit)) then
Main_Restrictions.Violated (No_Elaboration_Code) := True;
exit;


diff --git a/gcc/ada/par-load.adb b/gcc/ada/par-load.adb
--- a/gcc/ada/par-load.adb
+++ b/gcc/ada/par-load.adb
@@ -265,6 +265,12 @@ begin
  --  and this is also where we generate the SCO's for this spec.
 
  if Cur_Unum = Main_Unit then
+
+--  We generate code for the main unit body, so we need to generate
+--  code for its spec too.
+
+Set_Generate_Code (Unum, True);
+
 Main_Unit_Entity := Cunit_Entity (Unum);
 
 if Generate_SCO then




[Ada] Replace chopped string copy with renaming

2021-07-07 Thread Pierre-Marie de Rodat
Avoid local string copy with renaming, which both make the code shorter
to read and should be marginally faster to execute. Code cleanup only
related to loading of compilation units; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* par.adb (Par): A local Name variable is now a renaming of a
constant slice.diff --git a/gcc/ada/par.adb b/gcc/ada/par.adb
--- a/gcc/ada/par.adb
+++ b/gcc/ada/par.adb
@@ -1650,14 +1650,12 @@ begin
  Uname : constant String :=
Get_Name_String
  (Unit_Name (Current_Source_Unit));
- Name  : String (1 .. Uname'Length - 2);
-
-  begin
+ Name  : String renames
+   Uname (Uname'First .. Uname'Last - 2);
  --  Because Unit_Name includes "%s"/"%b", we need to strip
  --  the last two characters to get the real unit name.
 
- Name := Uname (Uname'First .. Uname'Last - 2);
-
+  begin
  if Name = "ada" or else
 Name = "interfaces"  or else
 Name = "system"




[Ada] Fix precondition of Cot for code analyzers

2021-07-07 Thread Pierre-Marie de Rodat
The precondition of Cot in Ada.Numerics.Generic_Elementary_Functions is
not meant for execution (as enforced by the Assertion_Policy at the top
of the file) but for analysis only. A conjunct in the precondition of
Cot applied to two arguments (with a Cycle value) was incorrect, now
fixed.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/a-ngelfu.ads (Cot): Fix precondition.diff --git a/gcc/ada/libgnat/a-ngelfu.ads b/gcc/ada/libgnat/a-ngelfu.ads
--- a/gcc/ada/libgnat/a-ngelfu.ads
+++ b/gcc/ada/libgnat/a-ngelfu.ads
@@ -126,7 +126,7 @@ is
  Pre => Cycle > 0.0
and then X /= 0.0
and then Float_Type'Base'Remainder (X, Cycle) /= 0.0
-   and then abs Float_Type'Base'Remainder (X, Cycle) = 0.5 * Cycle;
+   and then abs Float_Type'Base'Remainder (X, Cycle) /= 0.5 * Cycle;
 
function Arcsin (X : Float_Type'Base) return Float_Type'Base with
  Pre  => abs X <= 1.0,




[Ada] Linker_Section_Pragma cleanup

2021-07-07 Thread Pierre-Marie de Rodat
Remove Linker_Section_Pragma field from Record_Field_Kind.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gen_il-gen-gen_entities.adb: Remove Linker_Section_Pragma
field from Record_Field_Kind.  Minor comment improvement.diff --git a/gcc/ada/gen_il-gen-gen_entities.adb b/gcc/ada/gen_il-gen-gen_entities.adb
--- a/gcc/ada/gen_il-gen-gen_entities.adb
+++ b/gcc/ada/gen_il-gen-gen_entities.adb
@@ -315,15 +315,14 @@ begin -- Gen_IL.Gen.Gen_Entities
 Sm (Entry_Formal, Node_Id),
 Sm (Esize, Uint),
 Sm (Interface_Name, Node_Id),
-Sm (Linker_Section_Pragma, Node_Id),
 Sm (Normalized_First_Bit, Uint),
 Sm (Normalized_Position, Uint),
 Sm (Normalized_Position_Max, Uint),
 Sm (Original_Record_Component, Node_Id)));
 
Cc (E_Component, Record_Field_Kind,
-   --  Components of a record declaration, private declarations of
-   --  protected objects.
+   --  Components (other than discriminants) of a record declaration,
+   --  private declarations of protected objects.
(Sm (Discriminant_Checking_Func, Node_Id),
 Sm (DT_Entry_Count, Uint,
 Pre => "Is_Tag (N)"),




[Ada] Replace low-level membership tests with Is_Private_Type

2021-07-07 Thread Pierre-Marie de Rodat
Code cleanup; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch3.adb, exp_ch6.adb, sem_ch6.adb: Replace Ekind
membership test in Private_Kind with a call to Is_Private_Type.diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -6000,7 +6000,7 @@ package body Exp_Ch3 is
   --  The parent type is private then we need to inherit any TSS operations
   --  from the full view.
 
-  if Ekind (Par_Id) in Private_Kind
+  if Is_Private_Type (Par_Id)
 and then Present (Full_View (Par_Id))
   then
  Par_Id := Base_Type (Full_View (Par_Id));
@@ -6036,7 +6036,7 @@ package body Exp_Ch3 is
 --  If the derived type itself is private with a full view, then
 --  associate the full view with the inherited TSS_Elist as well.
 
-if Ekind (B_Id) in Private_Kind
+if Is_Private_Type (B_Id)
   and then Present (Full_View (B_Id))
 then
Ensure_Freeze_Node (Base_Type (Full_View (B_Id)));


diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -3761,7 +3761,7 @@ package body Exp_Ch6 is
  --  because the object has underlying discriminants with defaults.
 
  if Present (Extra_Constrained (Formal)) then
-if Ekind (Etype (Prev)) in Private_Kind
+if Is_Private_Type (Etype (Prev))
   and then not Has_Discriminants (Base_Type (Etype (Prev)))
 then
Add_Extra_Actual


diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -8901,7 +8901,7 @@ package body Sem_Ch6 is
 end if;
 
 if not Has_Discriminants (Formal_Type)
-  and then Ekind (Formal_Type) in Private_Kind
+  and then Is_Private_Type (Formal_Type)
   and then Present (Underlying_Type (Formal_Type))
 then
Formal_Type := Underlying_Type (Formal_Type);
@@ -11253,7 +11253,7 @@ package body Sem_Ch6 is
 --  If the entity is a private type, then it must be declared in a
 --  visible part.
 
-if Ekind (T) in Private_Kind then
+if Is_Private_Type (T) then
return True;
 
 elsif Is_Type (T) and then Has_Private_Declaration (T) then




[Ada] Simplify code by reusing List_Length

2021-07-07 Thread Pierre-Marie de Rodat
Code cleanup; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_disp.adb (CPP_Num_Prims): Reuse List_Length.diff --git a/gcc/ada/exp_disp.adb b/gcc/ada/exp_disp.adb
--- a/gcc/ada/exp_disp.adb
+++ b/gcc/ada/exp_disp.adb
@@ -588,19 +588,7 @@ package body Exp_Disp is
  --  Otherwise, count the primitives of the enclosing CPP type
 
  else
-declare
-   Count : Nat := 0;
-   Elmt  : Elmt_Id;
-
-begin
-   Elmt := First_Elmt (Primitive_Operations (CPP_Typ));
-   while Present (Elmt) loop
-  Count := Count + 1;
-  Next_Elmt (Elmt);
-   end loop;
-
-   return Count;
-end;
+return List_Length (Primitive_Operations (CPP_Typ));
  end if;
   end if;
end CPP_Num_Prims;




[Ada] Reduce scope of local variables

2021-07-07 Thread Pierre-Marie de Rodat
Code cleanup; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch6.adb (Check_For_Primitive_Subprogram): Move
declarations of local variables after nested subprogram bodies.diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -11018,10 +11018,6 @@ package body Sem_Ch6 is
 (Is_Primitive  : out Boolean;
  Is_Overriding : Boolean := False)
   is
- Formal : Entity_Id;
- F_Typ  : Entity_Id;
- B_Typ  : Entity_Id;
-
  procedure Add_Or_Replace_Untagged_Primitive (Typ : Entity_Id);
  --  Either add the new subprogram to the list of primitives for
  --  untagged type Typ, or if it overrides a primitive of Typ, then
@@ -11270,6 +11266,12 @@ package body Sem_Ch6 is
 end if;
  end Visible_Part_Type;
 
+ --  Local variables
+
+ Formal : Entity_Id;
+ F_Typ  : Entity_Id;
+ B_Typ  : Entity_Id;
+
   --  Start of processing for Check_For_Primitive_Subprogram
 
   begin




[Ada] Simplify handling of sure errors in GNATprove mode

2021-07-07 Thread Pierre-Marie de Rodat
In cases where it is known statically that an exception will be raised,
the frontend inserts a node of kind N_Raise_xxx_Error. This node kind
was previously not supported in GNATprove, which required special
handling for these cases in GNATprove mode. This is not needed anymore.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* checks.adb (Apply_Scalar_Range_Check): Remove special case for
GNATprove mode.
* sem_res.adb (Resolve_Arithmetic_Op): Same.
* sem_util.adb (Apply_Compile_Time_Constraint_Error): Same.diff --git a/gcc/ada/checks.adb b/gcc/ada/checks.adb
--- a/gcc/ada/checks.adb
+++ b/gcc/ada/checks.adb
@@ -3323,13 +3323,6 @@ package body Checks is
 
  Bad_Value (Warn => SPARK_Mode = On);
 
- --  In GNATprove mode, we enable the range check so that
- --  GNATprove will issue a message if it cannot be proved.
-
- if GNATprove_Mode then
-Enable_Range_Check (Expr);
- end if;
-
  return;
   end if;
 


diff --git a/gcc/ada/sem_res.adb b/gcc/ada/sem_res.adb
--- a/gcc/ada/sem_res.adb
+++ b/gcc/ada/sem_res.adb
@@ -6151,13 +6151,6 @@ package body Sem_Res is
  raise Program_Error;
end case;
 
-   --  In GNATprove mode, we enable the division check so that
-   --  GNATprove will issue a message if it cannot be proved.
-
-   if GNATprove_Mode then
-  Activate_Division_Check (N);
-   end if;
-
 --  Otherwise just set the flag to check at run time
 
 else


diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -1533,17 +1533,6 @@ package body Sem_Util is
   Discard_Node
 (Compile_Time_Constraint_Error (N, Msg, Ent, Loc, Warn => Warn));
 
-  --  In GNATprove mode, do not replace the node with an exception raised.
-  --  In such a case, either the call to Compile_Time_Constraint_Error
-  --  issues an error which stops analysis, or it issues a warning in
-  --  a few cases where a suitable check flag is set for GNATprove to
-  --  generate a check message.
-
-  if GNATprove_Mode then
- Set_Raises_Constraint_Error (N);
- return;
-  end if;
-
   --  Now we replace the node by an N_Raise_Constraint_Error node
   --  This does not need reanalyzing, so set it as analyzed now.
 




[Ada] Unchecked_Convert_To: set Parent

2021-07-07 Thread Pierre-Marie de Rodat
A previous change to Unchecked_Convert_To removed the setting of the
Parent of the new node, because it was thought to be unnecessary.
However, in rare cases, it is necessary because for example
Remove_Side_Effects is called on the new node before attaching it
to the tree.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* tbuild.adb (Unchecked_Convert_To): Set the Parent of the new
node to the Parent of the old node.
* tbuild.ads (Unchecked_Convert_To): Document differences
between Convert_To and Unchecked_Convert_To. The previous
documentation claimed they are identical except for the
uncheckedness of the conversion.diff --git a/gcc/ada/tbuild.adb b/gcc/ada/tbuild.adb
--- a/gcc/ada/tbuild.adb
+++ b/gcc/ada/tbuild.adb
@@ -919,10 +919,15 @@ package body Tbuild is
   --  All other cases
 
   else
- Result :=
-   Make_Unchecked_Type_Conversion (Loc,
- Subtype_Mark => New_Occurrence_Of (Typ, Loc),
- Expression   => Relocate_Node (Expr));
+ declare
+Expr_Parent : constant Node_Id := Parent (Expr);
+ begin
+Result :=
+  Make_Unchecked_Type_Conversion (Loc,
+Subtype_Mark => New_Occurrence_Of (Typ, Loc),
+Expression   => Relocate_Node (Expr));
+Set_Parent (Result, Expr_Parent);
+ end;
   end if;
 
   Set_Etype (Result, Typ);


diff --git a/gcc/ada/tbuild.ads b/gcc/ada/tbuild.ads
--- a/gcc/ada/tbuild.ads
+++ b/gcc/ada/tbuild.ads
@@ -340,7 +340,10 @@ package Tbuild is
  (Typ  : Entity_Id;
   Expr : Node_Id) return Node_Id;
--  Like Convert_To, but if a conversion is actually needed, constructs an
-   --  N_Unchecked_Type_Conversion node to do the required conversion.
+   --  N_Unchecked_Type_Conversion node to do the required conversion. Unlike
+   --  Convert_To, a new node is not required if Expr is already of the correct
+   --  BASE type, and if a new node is created, the Parent of Expr is copied to
+   --  it.
 
-
-- Subprograms for Use by Gnat1drv --




[Ada] Timeout correction on Get_Socket_Option

2021-07-07 Thread Pierre-Marie de Rodat
The Set_Socket_Option shifts timeout for -500ms on old Windows versions,
but Get_Socket_Option did +500ms for timeouts on all Windows versions.
This commit fixes it and +500ms on Get_Socket_Option only for old
Windows versions.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/g-socket.adb (Get_Socket_Option): Add 500ms only when
Minus_500ms_Windows_Timeout is True.
(Set_Socket_Option): Use "* 1000" instead of "/ 0.001" to
convert to milliseconds.diff --git a/gcc/ada/libgnat/g-socket.adb b/gcc/ada/libgnat/g-socket.adb
--- a/gcc/ada/libgnat/g-socket.adb
+++ b/gcc/ada/libgnat/g-socket.adb
@@ -1570,14 +1570,18 @@ package body GNAT.Sockets is
 | Send_Timeout
  =>
 if Is_Windows then
-
-   --  Timeout is in milliseconds, actual value is 500 ms +
-   --  returned value (unless it is 0).
-
if U4 = 0 then
   Opt.Timeout := 0.0;
+
else
-  Opt.Timeout :=  Duration (U4) / 1000 + 0.500;
+  if Minus_500ms_Windows_Timeout then
+ --  Timeout is in milliseconds, actual value is 500 ms +
+ --  returned value (unless it is 0).
+
+ U4 := U4 + 500;
+  end if;
+
+  Opt.Timeout := Duration (U4) / 1000;
end if;
 
 else
@@ -2724,7 +2728,7 @@ package body GNAT.Sockets is
Len := U4'Size / 8;
Add := U4'Address;
 
-   U4 := C.unsigned (Option.Timeout / 0.001);
+   U4 := C.unsigned (Option.Timeout * 1000);
 
if Option.Timeout > 0.0 and then U4 = 0 then
   --  Avoid round to zero. Zero timeout mean unlimited




[Ada] Improve interactions between DSA and Put_Image routines for tagged types

2021-07-07 Thread Pierre-Marie de Rodat
Back out of an overly aggressive workaround for compilation problems
associated with a Put_Image routine for a tagged type in a Remote_Types
package and try a different (hopefully better) approach that is more
consistent with how other predefined primitives are treated.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_dist.adb (Add_RACW_Primitive_Declarations_And_Bodies): Add
TSS_Put_Image to list of predefined primitives that need special
treatment.
(Build_General_Calling_Stubs, Build_Subprogram_Receiving_Stubs):
Remove previous hack for dealing with TSS_Put_Image procedures.diff --git a/gcc/ada/exp_dist.adb b/gcc/ada/exp_dist.adb
--- a/gcc/ada/exp_dist.adb
+++ b/gcc/ada/exp_dist.adb
@@ -1424,6 +1424,7 @@ package body Exp_Dist is
   and then Chars (Current_Primitive) /= Name_uAlignment
   and then not
 (Is_TSS (Current_Primitive, TSS_Deep_Finalize) or else
+ Is_TSS (Current_Primitive, TSS_Put_Image) or else
  Is_TSS (Current_Primitive, TSS_Stream_Input)  or else
  Is_TSS (Current_Primitive, TSS_Stream_Output) or else
  Is_TSS (Current_Primitive, TSS_Stream_Read)   or else
@@ -4211,14 +4212,6 @@ package body Exp_Dist is
  --  Used only for the PolyORB case
 
   begin
- --  workaround for later failures in Exp_Util.Find_Prim_Op
- if Is_TSS (Defining_Unit_Name (Spec), TSS_Put_Image) then
-Append_To (Statements,
-   Make_Raise_Program_Error (Loc,
- Reason => PE_Stream_Operation_Not_Allowed));
-return;
- end if;
-
  --  The general form of a calling stub for a given subprogram is:
 
  --procedure X (...) is P : constant Partition_ID :=
@@ -4734,11 +4727,11 @@ package body Exp_Dist is
  --  Formal parameter for receiving stubs: a descriptor for an incoming
  --  request.
 
- Decls : List_Id := New_List;
+ Decls : constant List_Id := New_List;
  --  All the parameters will get declared before calling the real
  --  subprograms. Also the out parameters will be declared.
 
- Statements : List_Id := New_List;
+ Statements : constant List_Id := New_List;
 
  Extra_Formal_Statements : constant List_Id := New_List;
  --  Statements concerning extra formal parameters
@@ -5173,19 +5166,6 @@ package body Exp_Dist is
  Parameter_Type  =>
New_Occurrence_Of (RTE (RE_Request_Access), Loc;
 
- --  workaround for later failures in Exp_Util.Find_Prim_Op
- if Is_TSS (Defining_Unit_Name (Specification (Vis_Decl)),
-TSS_Put_Image)
- then
---  drop everything on the floor
-Decls := New_List;
-Statements := New_List;
-Excep_Handlers := New_List;
-Append_To (Statements,
-   Make_Raise_Program_Error (Loc,
- Reason => PE_Stream_Operation_Not_Allowed));
- end if;
-
  return
Make_Subprogram_Body (Loc,
  Specification  => Subp_Spec,




[Ada] Fix bugs in Value_Size clauses and refactor

2021-07-07 Thread Pierre-Marie de Rodat
Size and Value_Size clauses are documented to be the same, except that
Value_Size is allowed for nonfirst subtypes, and Size is allowed for
objects. This was far from true, which caused bugs such as ignoring
Value_Size for access types, in cases where a Size clause would trigger
the use of thin pointers, and this patch fixes that.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch13.adb (Analyze_Attribute_Definition_Clause): Combine
processing of Size and Value_Size clauses. Ensure that
Value_Size is treated the same as Size, in the cases where both
are allowed (i.e. the prefix denotes a first subtype).  Misc
cleanup.
* einfo-utils.adb (Init_Size): Add assertions.
(Size_Clause): Return a Value_Size clause if present, instead of
just looking for a Size clause.
* einfo.ads (Has_Size_Clause, Size_Clause): Change documentation
to include Value_Size.
* sem_ch13.ads, layout.ads, layout.adb: Comment modifications.diff --git a/gcc/ada/einfo-utils.adb b/gcc/ada/einfo-utils.adb
--- a/gcc/ada/einfo-utils.adb
+++ b/gcc/ada/einfo-utils.adb
@@ -481,7 +481,13 @@ package body Einfo.Utils is
 
procedure Init_Size (Id : E; V : Int) is
begin
-  pragma Assert (not Is_Object (Id));
+  pragma Assert (Is_Type (Id));
+  pragma Assert
+(not Known_Esize (Id) or else Esize (Id) = V);
+  pragma Assert
+(RM_Size (Id) = No_Uint
+   or else RM_Size (Id) = Uint_0
+   or else RM_Size (Id) = V);
   Set_Esize (Id, UI_From_Int (V));
   Set_RM_Size (Id, UI_From_Int (V));
end Init_Size;
@@ -492,7 +498,7 @@ package body Einfo.Utils is
 
procedure Init_Size_Align (Id : E) is
begin
-  pragma Assert (not Is_Object (Id));
+  pragma Assert (Ekind (Id) in Type_Kind | E_Void);
   Set_Esize (Id, Uint_0);
   Set_RM_Size (Id, Uint_0);
   Set_Alignment (Id, Uint_0);
@@ -2927,8 +2933,13 @@ package body Einfo.Utils is
-
 
function Size_Clause (Id : E) return N is
+  Result : N := Get_Attribute_Definition_Clause (Id, Attribute_Size);
begin
-  return Get_Attribute_Definition_Clause (Id, Attribute_Size);
+  if No (Result) then
+ Result := Get_Attribute_Definition_Clause (Id, Attribute_Value_Size);
+  end if;
+
+  return Result;
end Size_Clause;
 



diff --git a/gcc/ada/einfo.ads b/gcc/ada/einfo.ads
--- a/gcc/ada/einfo.ads
+++ b/gcc/ada/einfo.ads
@@ -2015,11 +2015,11 @@ package Einfo is
 --   which at least one of the shift operators is defined.
 
 --Has_Size_Clause
---   Defined in entities for types and objects. Set if a size clause is
---   defined for the entity. Used to prevent multiple Size clauses for a
---   given entity. Note that it is always initially cleared for a derived
---   type, even though the Size for such a type is inherited from a Size
---   clause given for the parent type.
+--   Defined in entities for types and objects. Set if a size or value size
+--   clause is defined for the entity. Used to prevent multiple clauses
+--   for a given entity. Note that it is always initially cleared for a
+--   derived type, even though the Size or Value_Size clause for such a
+--   type might be inherited from an ancestor type.
 
 --Has_Small_Clause
 --   Defined in ordinary fixed point types (but not subtypes). Indicates
@@ -4321,13 +4321,12 @@ package Einfo is
 --   suppress this code if a subsequent address clause is encountered.
 
 --Size_Clause (synthesized)
---   Applies to all entities. If a size clause is present in the rep
---   item chain for an entity then the attribute definition clause node
---   for the size clause is returned. Otherwise Size_Clause returns Empty
---   if no item is present. Usually this is only meaningful if the flag
---   Has_Size_Clause is set. This is because when the representation item
---   chain is copied for a derived type, it can inherit a size clause that
---   is not applicable to the entity.
+--   Applies to all entities. If a size or value size clause is present in
+--   the rep item chain for an entity then that attribute definition clause
+--   is returned. Otherwise Size_Clause returns Empty. Usually this is only
+--   meaningful if the flag Has_Size_Clause is set. This is because when
+--   the representation item chain is copied for a derived type, it can
+--   inherit a size clause that is not applicable to the entity.
 
 --Size_Depends_On_Discriminant
 --   Defined in all entities for types and subtypes. Indicates that the


diff --git a/gcc/ada/layout.adb b/gcc/ada/layout.adb
--- a/gcc/ada/layout.adb
+++ b/gcc/ada/layout.adb
@@ -270,15 +270,15 @@ package body Layout is
 Desig_Type := Non_Limited_View (Designated_Type (E));
  end if;
 
- --  If Esize already set (

[Ada] Replace obsolete calls that use global name buffer

2021-07-07 Thread Pierre-Marie de Rodat
Code cleanup related to loading of compilation units; behaviour is
unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* uname.adb (Get_Body_Name, Get_Parent_Body_Name,
Get_Parent_Spec_Name, Get_Spec_Name, Is_Child_Name,
Is_Body_Name, Is_Spec_Name, Name_To_Unit_Name): Use a local
instead of the global buffer.diff --git a/gcc/ada/uname.adb b/gcc/ada/uname.adb
--- a/gcc/ada/uname.adb
+++ b/gcc/ada/uname.adb
@@ -47,15 +47,18 @@ package body Uname is
---
 
function Get_Body_Name (N : Unit_Name_Type) return Unit_Name_Type is
+  Buffer : Bounded_String;
begin
-  Get_Name_String (N);
+  Append (Buffer, N);
 
-  pragma Assert (Name_Len > 2
-   and then Name_Buffer (Name_Len - 1) = '%'
-   and then Name_Buffer (Name_Len) = 's');
+  pragma Assert
+(Buffer.Length > 2
+ and then Buffer.Chars (Buffer.Length - 1) = '%'
+ and then Buffer.Chars (Buffer.Length) = 's');
 
-  Name_Buffer (Name_Len) := 'b';
-  return Name_Find;
+  Buffer.Chars (Buffer.Length) := 'b';
+
+  return Name_Find (Buffer);
end Get_Body_Name;
 
---
@@ -111,19 +114,19 @@ package body Uname is
--
 
function Get_Parent_Body_Name (N : Unit_Name_Type) return Unit_Name_Type is
+  Buffer : Bounded_String;
begin
-  Get_Name_String (N);
+  Append (Buffer, N);
 
-  while Name_Buffer (Name_Len) /= '.' loop
- pragma Assert (Name_Len > 1); -- not a child or subunit name
- Name_Len := Name_Len - 1;
+  while Buffer.Chars (Buffer.Length) /= '.' loop
+ pragma Assert (Buffer.Length > 1); -- not a child or subunit name
+ Buffer.Length := Buffer.Length - 1;
   end loop;
 
-  Name_Buffer (Name_Len) := '%';
-  Name_Len := Name_Len + 1;
-  Name_Buffer (Name_Len) := 'b';
-  return Name_Find;
+  Buffer.Chars (Buffer.Length) := '%';
+  Append (Buffer, 'b');
 
+  return Name_Find (Buffer);
end Get_Parent_Body_Name;
 
--
@@ -131,22 +134,22 @@ package body Uname is
--
 
function Get_Parent_Spec_Name (N : Unit_Name_Type) return Unit_Name_Type is
+  Buffer : Bounded_String;
begin
-  Get_Name_String (N);
+  Append (Buffer, N);
 
-  while Name_Buffer (Name_Len) /= '.' loop
- if Name_Len = 1 then
+  while Buffer.Chars (Buffer.Length) /= '.' loop
+ if Buffer.Length = 1 then
 return No_Unit_Name;
  else
-Name_Len := Name_Len - 1;
+Buffer.Length := Buffer.Length - 1;
  end if;
   end loop;
 
-  Name_Buffer (Name_Len) := '%';
-  Name_Len := Name_Len + 1;
-  Name_Buffer (Name_Len) := 's';
-  return Name_Find;
+  Buffer.Chars (Buffer.Length) := '%';
+  Append (Buffer, 's');
 
+  return Name_Find (Buffer);
end Get_Parent_Spec_Name;
 
---
@@ -154,15 +157,18 @@ package body Uname is
---
 
function Get_Spec_Name (N : Unit_Name_Type) return Unit_Name_Type is
+  Buffer : Bounded_String;
begin
-  Get_Name_String (N);
+  Append (Buffer, N);
 
-  pragma Assert (Name_Len > 2
-   and then Name_Buffer (Name_Len - 1) = '%'
-   and then Name_Buffer (Name_Len) = 'b');
+  pragma Assert
+(Buffer.Length > 2
+ and then Buffer.Chars (Buffer.Length - 1) = '%'
+ and then Buffer.Chars (Buffer.Length) = 'b');
 
-  Name_Buffer (Name_Len) := 's';
-  return Name_Find;
+  Buffer.Chars (Buffer.Length) := 's';
+
+  return Name_Find (Buffer);
end Get_Spec_Name;
 
---
@@ -489,11 +495,12 @@ package body Uname is
--
 
function Is_Body_Name (N : Unit_Name_Type) return Boolean is
+  Buffer : Bounded_String;
begin
-  Get_Name_String (N);
-  return Name_Len > 2
-and then Name_Buffer (Name_Len - 1) = '%'
-and then Name_Buffer (Name_Len) = 'b';
+  Append (Buffer, N);
+  return Buffer.Length > 2
+and then Buffer.Chars (Buffer.Length - 1) = '%'
+and then Buffer.Chars (Buffer.Length) = 'b';
end Is_Body_Name;
 
---
@@ -501,17 +508,16 @@ package body Uname is
---
 
function Is_Child_Name (N : Unit_Name_Type) return Boolean is
-  J : Natural;
+  Buffer : Bounded_String;
 
begin
-  Get_Name_String (N);
-  J := Name_Len;
+  Append (Buffer, N);
 
-  while Name_Buffer (J) /= '.' loop
- if J = 1 then
+  while Buffer.Chars (Buffer.Length) /= '.' loop
+ if Buffer.Length = 1 then
 return False; -- not a child or subunit name
  else
-J := J - 1;
+Buffer.Length := Buffer.Length - 1;
  end if;
   end loop;
 
@@ -589,11 +595,12

[Ada] Use bounded string buffer in Get_Unit_Name

2021-07-07 Thread Pierre-Marie de Rodat
Code cleanup; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* uname.adb (Get_Unit_Name): Simplify with a bounded string
buffer; also, this addresses a ??? comment about the max length
being exceeded.diff --git a/gcc/ada/uname.adb b/gcc/ada/uname.adb
--- a/gcc/ada/uname.adb
+++ b/gcc/ada/uname.adb
@@ -177,13 +177,8 @@ package body Uname is
 
function Get_Unit_Name (N : Node_Id) return Unit_Name_Type is
 
-  Unit_Name_Buffer : String (1 .. Hostparm.Max_Name_Length);
-  --  Buffer used to build name of unit. Note that we cannot use the
-  --  Name_Buffer in package Name_Table because we use it to read
-  --  component names.
-
-  Unit_Name_Length : Natural := 0;
-  --  Length of name stored in Unit_Name_Buffer
+  Unit_Name_Buffer : Bounded_String;
+  --  Buffer used to build name of unit
 
   Node : Node_Id;
   --  Program unit node
@@ -206,9 +201,7 @@ package body Uname is
 
   procedure Add_Char (C : Character) is
   begin
- --  Should really check for max length exceeded here???
- Unit_Name_Length := Unit_Name_Length + 1;
- Unit_Name_Buffer (Unit_Name_Length) := C;
+ Append (Unit_Name_Buffer, C);
   end Add_Char;
 
   --
@@ -217,11 +210,7 @@ package body Uname is
 
   procedure Add_Name (Name : Name_Id) is
   begin
- Get_Name_String (Name);
-
- for J in 1 .. Name_Len loop
-Add_Char (Name_Buffer (J));
- end loop;
+ Append (Unit_Name_Buffer, Name);
   end Add_Name;
 
   ---
@@ -414,11 +403,7 @@ package body Uname is
 raise Program_Error;
   end case;
 
-  Name_Buffer (1 .. Unit_Name_Length) :=
-Unit_Name_Buffer (1 .. Unit_Name_Length);
-  Name_Len := Unit_Name_Length;
-  return Name_Find;
-
+  return Name_Find (Unit_Name_Buffer);
end Get_Unit_Name;
 
--




[Ada] Fix location of errors about volatile compatibility

2021-07-07 Thread Pierre-Marie de Rodat
Ada 2022 errors about volatile compatibility between generic actual and
formal types were emitted on type declaration; now they are emitted at
the actual type within the generic instance.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch12.adb (Check_Shared_Variable_Control_Aspects): Errors
emitted via Check_Volatility_Compatibility are now emitted at
Actual, just like other errors emitted by
Check_Shared_Variable_Control_Aspects.diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -12825,7 +12825,7 @@ package body Sem_Ch12 is
 Check_Volatility_Compatibility
   (Act_T, A_Gen_T,
"actual type", "its corresponding formal type",
-   Srcpos_Bearer => Act_T);
+   Srcpos_Bearer => Actual);
  end if;
   end Check_Shared_Variable_Control_Aspects;
 




[Ada] Implement new legality checks specified by AI12-0412

2021-07-07 Thread Pierre-Marie de Rodat
Ada 2022, in AI12-0412, specifies that certain uses of primitives of an
abstract type that have Pre'Class or Post'Class aspect are illegal when
an aspect is given with a nonstatic expression. Specifically, if the
primitive is nonabstract and has such aspects, it's illegal to make a
nondispatching call to it, as well as to apply 'Access to it or pass it
as an actual for a nonabstract formal subprogram.

Furthermore, by AI12-0170, as revised by AI12-0412, one can specify
class-wide Pre and Post with calls to abstract functions, where any
actuals given by a formal parameter of the primitive with the class-wide
aspect are reinterpreted using a "notional (nonabstract) type" for any
primitive of an abstract type, whether or not the primitive is abstract,
and part of these changes now allows such aspects for nonabstract
primitive cases that were formerly being rejected (previously GNAT was
only allowing this for null procedures).

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* freeze.adb (Check_Inherited_Conditions): Setting of Ekind,
LSP_Subprogram, and Is_Wrapper needs to happen for null
procedures as well as other wrapper cases, so the code is moved
from the else part in front of the if statement.  (Fixes a
latent bug encountered while working on this set of changes.)
* sem_attr.adb (Resolve_Attribute): Report an error for the case
of an Access attribute applied to a primitive of an abstract
type when the primitive has any nonstatic Pre'Class or
Post'Class expressions.
* sem_ch8.adb (Analyze_Subprogram_Renaming): Report an error for
the case of a actual subprogram associated with a nonabstract
formal subprogram when the actual is a primitive of an abstract
type and the primitive has any nonstatic Pre'Class or Post'Class
expressions.
* sem_disp.adb (Check_Dispatching_Context): Remove special
testing for null procedures, and replace it with a relaxed test
that avoids getting an error about illegal calls to abstract
subprograms in cases where RM 6.1.1(7/5) applies in
Pre/Post'Class aspects. Also, remove special test for
Postcondition, which seems to be unnecessary, update associated
comments, and fix a typo in one comment.
(Check_Dispatching_Call): Remove an unneeded return statement,
and report an error for the case of a nondispatching call to a
nonabstract subprogram of an abstract type where the subprogram
has nonstatic Pre/Post'Class aspects.
* sem_util.ads
(Is_Prim_Of_Abst_Type_With_Nonstatic_CW_Pre_Post): New function.
(In_Pre_Post_Condition): Add a flag formal Class_Wide_Only,
defaulted to False, for indicating whether the function should
only test for the node being within class-wide pre- and
postconditions.
* sem_util.adb
(Is_Prim_Of_Abst_Type_With_Nonstatic_CW_Pre_Post): New function
to determine whether a subprogram is a primitive of an abstract
type where the primitive has class-wide Pre/Post'Class aspects
specified with nonstatic expressions.
(In_Pre_Post_Condition): Extend testing to account for the new
formal Class_Wide_Only.diff --git a/gcc/ada/freeze.adb b/gcc/ada/freeze.adb
--- a/gcc/ada/freeze.adb
+++ b/gcc/ada/freeze.adb
@@ -1671,6 +1671,12 @@ package body Freeze is
--  type declaration that generates inherited operation. For
--  a null procedure, the declaration implies a null body.
 
+   --  Before insertion, do some minimal decoration of fields
+
+   Mutate_Ekind (New_Id, Ekind (Par_Prim));
+   Set_LSP_Subprogram (New_Id, Par_Prim);
+   Set_Is_Wrapper (New_Id);
+
if Nkind (New_Spec) = N_Procedure_Specification
  and then Null_Present (New_Spec)
then
@@ -1684,12 +1690,6 @@ package body Freeze is
 Build_Class_Wide_Clone_Call
   (Loc, Decls, Par_Prim, New_Spec);
 
-  --  Adding minimum decoration
-
-  Mutate_Ekind (New_Id, Ekind (Par_Prim));
-  Set_LSP_Subprogram (New_Id, Par_Prim);
-  Set_Is_Wrapper (New_Id);
-
   Insert_List_After_And_Analyze
 (Par_R, New_List (New_Decl, New_Body));
 


diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb
--- a/gcc/ada/sem_attr.adb
+++ b/gcc/ada/sem_attr.adb
@@ -11499,6 +11499,25 @@ package body Sem_Attr is
Error_Msg_F ("context requires a non-protected subprogram", P);
 end if;
 
+--  AI12-0412: The rule in RM 6.1.1(18.2/5) disallows applying
+--  attribute Access to a primitive of an abstract type when the
+--  primitive has any Pre'Class or Post'Class aspects specified
+--  with nonstatic expressions

[Ada] Code cleanups in System.Atomic_Counters

2021-07-07 Thread Pierre-Marie de Rodat
In particular, now that we are using Atomic_Unsigned which is marked
Atomic, we no longer need to mark Atomic_Counter.Value explicitly
atomic.

We can also get rid of all uses of 'Unrestricted_Access

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/s-atocou.ads, libgnat/s-atocou__builtin.adb: Code
cleanups.diff --git a/gcc/ada/libgnat/s-atocou.ads b/gcc/ada/libgnat/s-atocou.ads
--- a/gcc/ada/libgnat/s-atocou.ads
+++ b/gcc/ada/libgnat/s-atocou.ads
@@ -101,7 +101,6 @@ private
 
type Atomic_Counter is record
   Value : aliased Atomic_Unsigned := 1;
-  pragma Atomic (Value);
end record;
 
 end System.Atomic_Counters;


diff --git a/gcc/ada/libgnat/s-atocou__builtin.adb b/gcc/ada/libgnat/s-atocou__builtin.adb
--- a/gcc/ada/libgnat/s-atocou__builtin.adb
+++ b/gcc/ada/libgnat/s-atocou__builtin.adb
@@ -51,24 +51,19 @@ package body System.Atomic_Counters is
 
procedure Decrement (Item : aliased in out Atomic_Unsigned) is
begin
-  if Sync_Sub_And_Fetch (Item'Unrestricted_Access, 1) = 0 then
+  if Sync_Sub_And_Fetch (Item'Unchecked_Access, 1) = 0 then
  null;
   end if;
end Decrement;
 
function Decrement (Item : aliased in out Atomic_Unsigned) return Boolean is
begin
-  return Sync_Sub_And_Fetch (Item'Unrestricted_Access, 1) = 0;
+  return Sync_Sub_And_Fetch (Item'Unchecked_Access, 1) = 0;
end Decrement;
 
function Decrement (Item : in out Atomic_Counter) return Boolean is
begin
-  --  Note: the use of Unrestricted_Access here is required because we
-  --  are obtaining an access-to-volatile pointer to a non-volatile object.
-  --  This is not allowed for [Unchecked_]Access, but is safe in this case
-  --  because we know that no aliases are being created.
-
-  return Sync_Sub_And_Fetch (Item.Value'Unrestricted_Access, 1) = 0;
+  return Sync_Sub_And_Fetch (Item.Value'Unchecked_Access, 1) = 0;
end Decrement;
 
---
@@ -77,17 +72,12 @@ package body System.Atomic_Counters is
 
procedure Increment (Item : aliased in out Atomic_Unsigned) is
begin
-  Sync_Add_And_Fetch (Item'Unrestricted_Access, 1);
+  Sync_Add_And_Fetch (Item'Unchecked_Access, 1);
end Increment;
 
procedure Increment (Item : in out Atomic_Counter) is
begin
-  --  Note: the use of Unrestricted_Access here is required because we are
-  --  obtaining an access-to-volatile pointer to a non-volatile object.
-  --  This is not allowed for [Unchecked_]Access, but is safe in this case
-  --  because we know that no aliases are being created.
-
-  Sync_Add_And_Fetch (Item.Value'Unrestricted_Access, 1);
+  Sync_Add_And_Fetch (Item.Value'Unchecked_Access, 1);
end Increment;
 





[Ada] Minor code cleanup

2021-07-07 Thread Pierre-Marie de Rodat
To help codepeer analysis.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/g-debpoo.adb (Code_Address_For_Allocate_End): Default
Initialize.diff --git a/gcc/ada/libgnat/g-debpoo.adb b/gcc/ada/libgnat/g-debpoo.adb
--- a/gcc/ada/libgnat/g-debpoo.adb
+++ b/gcc/ada/libgnat/g-debpoo.adb
@@ -362,7 +362,7 @@ package body GNAT.Debug_Pools is
--  These procedures are used as markers when computing the stacktraces,
--  so that addresses in the debug pool itself are not reported to the user.
 
-   Code_Address_For_Allocate_End: System.Address;
+   Code_Address_For_Allocate_End: System.Address := System.Null_Address;
Code_Address_For_Deallocate_End  : System.Address;
Code_Address_For_Dereference_End : System.Address;
--  Taking the address of the above procedures will not work on some




[Ada] Simplify code by reusing Remove on list of primitive operations

2021-07-07 Thread Pierre-Marie de Rodat
Code cleanup; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_prag.adb (Analyze_Pragma): Simplify processing of pragma
CPP_Constructor.diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb
--- a/gcc/ada/sem_prag.adb
+++ b/gcc/ada/sem_prag.adb
@@ -14738,7 +14738,6 @@ package body Sem_Prag is
  --[, [Link_Name =>] static_string_EXPRESSION ]);
 
  when Pragma_CPP_Constructor => CPP_Constructor : declare
-Elmt: Elmt_Id;
 Id  : Entity_Id;
 Def_Id  : Entity_Id;
 Tag_Typ : Entity_Id;
@@ -14805,12 +14804,7 @@ package body Sem_Prag is
then
   Tag_Typ := Etype (Def_Id);
 
-  Elmt := First_Elmt (Primitive_Operations (Tag_Typ));
-  while Present (Elmt) and then Node (Elmt) /= Def_Id loop
- Next_Elmt (Elmt);
-  end loop;
-
-  Remove_Elmt (Primitive_Operations (Tag_Typ), Elmt);
+  Remove (Primitive_Operations (Tag_Typ), Def_Id);
   Set_Is_Dispatching_Operation (Def_Id, False);
end if;
 




[Ada] Optimize away certain elaboration checks

2021-07-07 Thread Pierre-Marie de Rodat
The body of every primitive subprogram contains an elaboration check, in
case a dispatching call is made. These checks happen even in the static
model. This patch removes the checks if pragma Pure or Preelaborate is
present, because they cannot fail in that case.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* checks.adb (Install_Primitive_Elaboration_Check): Do not
generate elaboration checks for primitives if pragma Pure or
Preelaborate is present.  Misc comment fixes, including
referring to the correct kind of check (elaboration, not
accessibility).
* checks.ads, restrict.adb, sem_cat.ads, sinfo.ads: Minor
reformatting and comment fixes.diff --git a/gcc/ada/checks.adb b/gcc/ada/checks.adb
--- a/gcc/ada/checks.adb
+++ b/gcc/ada/checks.adb
@@ -48,6 +48,7 @@ with Sem;use Sem;
 with Sem_Aux;use Sem_Aux;
 with Sem_Ch3;use Sem_Ch3;
 with Sem_Ch8;use Sem_Ch8;
+with Sem_Cat;use Sem_Cat;
 with Sem_Disp;   use Sem_Disp;
 with Sem_Eval;   use Sem_Eval;
 with Sem_Mech;   use Sem_Mech;
@@ -84,7 +85,7 @@ package body Checks is
--  such as Apply_Scalar_Range_Check that do not insert any code can be
--  safely called even when the Expander is inactive (but Errors_Detected
--  is 0). The benefit of executing this code when expansion is off, is
-   --  the ability to emit constraint error warning for static expressions
+   --  the ability to emit constraint error warnings for static expressions
--  even when we are not generating code.
 
--  The above is modified in gnatprove mode to ensure that proper check
@@ -8634,7 +8635,7 @@ package body Checks is
  return;
 
   --  Do not generate an elaboration check if the related subprogram is
-  --  not subjected to accessibility checks.
+  --  not subject to elaboration checks.
 
   elsif Elaboration_Checks_Suppressed (Subp_Id) then
  return;
@@ -8644,14 +8645,20 @@ package body Checks is
   elsif Restriction_Active (No_Elaboration_Code) then
  return;
 
+  --  If pragma Pure or Preelaborate applies, then these elaboration checks
+  --  cannot fail, so do not generate them.
+
+  elsif In_Preelaborated_Unit then
+ return;
+
   --  Do not generate an elaboration check if exceptions cannot be used,
   --  caught, or propagated.
 
   elsif not Exceptions_OK then
  return;
 
-  --  Do not consider subprograms which act as compilation units, because
-  --  they cannot be the target of a dispatching call.
+  --  Do not consider subprograms that are compilation units, because they
+  --  cannot be the target of a dispatching call.
 
   elsif Nkind (Context) = N_Compilation_Unit then
  return;
@@ -8681,10 +8688,10 @@ package body Checks is
   elsif Analyzed (Subp_Body) then
  return;
 
-  --  Do not consider primitives which occur within an instance that acts
-  --  as a compilation unit. Such an instance defines its spec and body out
-  --  of order (body is first) within the tree, which causes the reference
-  --  to the elaboration flag to appear as an undefined symbol.
+  --  Do not consider primitives that occur within an instance that is a
+  --  compilation unit. Such an instance defines its spec and body out of
+  --  order (body is first) within the tree, which causes the reference to
+  --  the elaboration flag to appear as an undefined symbol.
 
   elsif Within_Compilation_Unit_Instance (Subp_Id) then
  return;


diff --git a/gcc/ada/checks.ads b/gcc/ada/checks.ads
--- a/gcc/ada/checks.ads
+++ b/gcc/ada/checks.ads
@@ -357,7 +357,7 @@ package Checks is
--  if so inserts the appropriate run-time check.
 
procedure Install_Primitive_Elaboration_Check (Subp_Body : Node_Id);
-   --  Insert a check which ensures that subprogram body Subp_Body has been
+   --  Insert a check to ensure that subprogram body Subp_Body has been
--  properly elaborated. The check is installed only when Subp_Body is the
--  body of a nonabstract library-level primitive of a tagged type. Further
--  restrictions may apply, see the body for details.


diff --git a/gcc/ada/restrict.adb b/gcc/ada/restrict.adb
--- a/gcc/ada/restrict.adb
+++ b/gcc/ada/restrict.adb
@@ -396,10 +396,9 @@ package body Restrict is
   N : Node_Id;
   V : Uint := Uint_Minus_1)
is
-  Msg_Issued : Boolean;
-  pragma Unreferenced (Msg_Issued);
+  Ignore_Msg_Issued : Boolean;
begin
-  Check_Restriction (Msg_Issued, R, N, V);
+  Check_Restriction (Ignore_Msg_Issued, R, N, V);
end Check_Restriction;
 
procedure Check_Restriction


diff --git a/gcc/ada/sem_cat.ads b/gcc/ada/sem_cat.ads
--- a/gcc/ada/sem_cat.ads
+++ b/gcc/ada/sem_cat.ads
@@ -27,7 +27,7 @@
 --  the semantic restrictions required for the categorization pragmas:
 --
 --Preelaborate
---Pure,
+--   

[Ada] Keepalive control on Windows

2021-07-07 Thread Pierre-Marie de Rodat
Windows headers in GCC could miss some available constants. Hardcode
TCP_KEEPCNT, TCP_KEEPIDLE, and TCP_KEEPINTVL constants for such case.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* s-oscons-tmplt.c (TCP_KEEPCNT TCP_KEEPIDLE, TCP_KEEPINTVL):
Hardcode on Windows if undefined.diff --git a/gcc/ada/s-oscons-tmplt.c b/gcc/ada/s-oscons-tmplt.c
--- a/gcc/ada/s-oscons-tmplt.c
+++ b/gcc/ada/s-oscons-tmplt.c
@@ -1502,18 +1502,36 @@ CNS(MSG_Forced_Flags, "")
 CND(TCP_NODELAY, "Do not coalesce packets")
 
 #ifndef TCP_KEEPCNT
+#ifdef __MINGW32__
+/* Windows headers can be too old to have all available constants.
+ * We know this one. */
+# define TCP_KEEPCNT 16
+#else
 # define TCP_KEEPCNT -1
 #endif
+#endif
 CND(TCP_KEEPCNT, "Maximum number of keepalive probes")
 
 #ifndef TCP_KEEPIDLE
+#ifdef __MINGW32__
+/* Windows headers can be too old to have all available constants.
+ * We know this one. */
+# define TCP_KEEPIDLE 3
+#else
 # define TCP_KEEPIDLE -1
 #endif
+#endif
 CND(TCP_KEEPIDLE, "Idle time before TCP starts sending keepalive probes")
 
 #ifndef TCP_KEEPINTVL
+#ifdef __MINGW32__
+/* Windows headers can be too old to have all available constants.
+ * We know this one. */
+# define TCP_KEEPINTVL 17
+#else
 # define TCP_KEEPINTVL -1
 #endif
+#endif
 CND(TCP_KEEPINTVL, "Time between individual keepalive probes")
 
 #ifndef SO_REUSEADDR




[Ada] Front-end inlining and instantiations of UC

2021-07-07 Thread Pierre-Marie de Rodat
A recent change exposed a latent bug where the Is_Intrinsic_Subprogram
flag was not propagated properly, leading to errors from the front-end
inlining of the form:

   cannot inline "xxx" (nested function instantiation)

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch12.adb (Analyze_Subprogram_Instantiation): Mark Anon_Id
intrinsic before calling Analyze_Instance_And_Renamings because
this flag may be propagated to other nodes.diff --git a/gcc/ada/sem_ch12.adb b/gcc/ada/sem_ch12.adb
--- a/gcc/ada/sem_ch12.adb
+++ b/gcc/ada/sem_ch12.adb
@@ -5796,6 +5796,14 @@ package body Sem_Ch12 is
 Set_SPARK_Mode (Gen_Unit);
  end if;
 
+ --  Need to mark Anon_Id intrinsic before calling
+ --  Analyze_Instance_And_Renamings because this flag may be propagated
+ --  to other nodes.
+
+ if Is_Intrinsic_Subprogram (Gen_Unit) then
+Set_Is_Intrinsic_Subprogram (Anon_Id);
+ end if;
+
  Analyze_Instance_And_Renamings;
 
  --  Restore SPARK_Mode from the context after analysis of the package
@@ -5817,7 +5825,6 @@ package body Sem_Ch12 is
  --  not within the main unit.
 
  if Is_Intrinsic_Subprogram (Gen_Unit) then
-Set_Is_Intrinsic_Subprogram (Anon_Id);
 Set_Is_Intrinsic_Subprogram (Act_Decl_Id);
 
 if Chars (Gen_Unit) = Name_Unchecked_Conversion then




[Ada] Stronger assertion about flag for checking static expressions

2021-07-07 Thread Pierre-Marie de Rodat
Ensure that Checking_For_Potentially_Static_Expression flag is
manipulated in a stack-like manner (with stack depth 1 at the most). The
previous assertion didn't prevent us from setting the flag to True or to
False twice in a row.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_eval.adb (Set_Checking_Potentially_Static_Expression):
Stronger assertion.diff --git a/gcc/ada/sem_eval.adb b/gcc/ada/sem_eval.adb
--- a/gcc/ada/sem_eval.adb
+++ b/gcc/ada/sem_eval.adb
@@ -6481,11 +6481,10 @@ package body Sem_Eval is
 
procedure Set_Checking_Potentially_Static_Expression (Value : Boolean) is
begin
-  --  Verify that we're not currently checking for a potentially static
-  --  expression unless we're disabling such checking.
+  --  Verify that we only start/stop checking for a potentially static
+  --  expression and do not start or stop it twice in a row.
 
-  pragma Assert
-(not Checking_For_Potentially_Static_Expression or else not Value);
+  pragma Assert (Checking_For_Potentially_Static_Expression /= Value);
 
   Checking_For_Potentially_Static_Expression := Value;
end Set_Checking_Potentially_Static_Expression;




[Ada] Assertion errors on concurrent types with -gnatc and extensions enabled

2021-07-07 Thread Pierre-Marie de Rodat
When expansion is disabled (such as with -gnatc), there are cases where
uses of concurrent types can lead to an Assertion_Failure when
extensions are enabled (by use of -gnatX, or due to instantiation of a
predefined library generic, such as Unchecked_Conversion), because
Primitive_Operations can return No_Elist and support for the object.op
feature for untagged types can lead to checking that list. This is fixed
by ensuring that the Direct_Primitive_Operations list is always
initialized to an empty list (No_Elmt_List) for concurrent types and by
having function Primitive_Oopeation return that list rather than
No_Elist.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* einfo-utils.adb (Primitive_Operations): Default to returning
Direct_Primitive_Operations in the case of concurrent types
(when Corresponding_Record_Type not present).
* sem_ch9.adb (Analyze_Protected_Type_Declaration): Initialize
Direct_Primitive_Operations to an empty element list.
(Analyze_Task_Type_Declaration): Initialize
Direct_Primitive_Operations to an empty element list.diff --git a/gcc/ada/einfo-utils.adb b/gcc/ada/einfo-utils.adb
--- a/gcc/ada/einfo-utils.adb
+++ b/gcc/ada/einfo-utils.adb
@@ -2493,15 +2493,15 @@ package body Einfo.Utils is
 return Direct_Primitive_Operations
   (Corresponding_Record_Type (Id));
 
- --  If expansion is disabled the corresponding record type is absent,
- --  but if the type has ancestors it may have primitive operations.
-
- elsif Is_Tagged_Type (Id) then
-return Direct_Primitive_Operations (Id);
+ --  When expansion is disabled, the corresponding record type is
+ --  absent, but if this is a tagged type with ancestors, or if the
+ --  extension of prefixed calls for untagged types is enabled, then
+ --  it may have associated primitive operations.
 
  else
-return No_Elist;
+return Direct_Primitive_Operations (Id);
  end if;
+
   else
  return Direct_Primitive_Operations (Id);
   end if;


diff --git a/gcc/ada/sem_ch9.adb b/gcc/ada/sem_ch9.adb
--- a/gcc/ada/sem_ch9.adb
+++ b/gcc/ada/sem_ch9.adb
@@ -2031,6 +2031,12 @@ package body Sem_Ch9 is
   Set_Has_Delayed_Freeze (T);
   Set_Stored_Constraint  (T, No_Elist);
 
+  --  Initialize type's primitive operations list, for possible use when
+  --  the extension of prefixed call notation for untagged types is enabled
+  --  (such as by use of -gnatX).
+
+  Set_Direct_Primitive_Operations (T, New_Elmt_List);
+
   --  Mark this type as a protected type for the sake of restrictions,
   --  unless the protected type is declared in a private part of a package
   --  of the runtime. With this exception, the Suspension_Object from
@@ -3152,6 +3158,12 @@ package body Sem_Ch9 is
   Set_Has_Delayed_Freeze (T, True);
   Set_Stored_Constraint  (T, No_Elist);
 
+  --  Initialize type's primitive operations list, for possible use when
+  --  the extension of prefixed call notation for untagged types is enabled
+  --  (such as by use of -gnatX).
+
+  Set_Direct_Primitive_Operations (T, New_Elmt_List);
+
   --  Set the SPARK_Mode from the current context (may be overwritten later
   --  with an explicit pragma).
 




[Ada] Remove unused define

2021-07-07 Thread Pierre-Marie de Rodat
__MINWGW32__ is typo and was not working, but anyway the MSG_WAITALL
is defined on Windows.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* s-oscons-tmplt.c (MSG_WAITALL): Remove wrong #ifdef
__MINWGW32__.diff --git a/gcc/ada/s-oscons-tmplt.c b/gcc/ada/s-oscons-tmplt.c
--- a/gcc/ada/s-oscons-tmplt.c
+++ b/gcc/ada/s-oscons-tmplt.c
@@ -1463,14 +1463,8 @@ CND(MSG_PEEK, "Peek at incoming data")
 CND(MSG_EOR, "Send end of record")
 
 #ifndef MSG_WAITALL
-#ifdef __MINWGW32__
-/* The value of MSG_WAITALL is 8.  Nevertheless winsock.h doesn't
-   define it, but it is still usable as we link to winsock2 API.  */
-# define MSG_WAITALL (1 << 3)
-#else
 # define MSG_WAITALL -1
 #endif
-#endif
 CND(MSG_WAITALL, "Wait for full reception")
 
 #ifndef MSG_NOSIGNAL




Re: [PATCH] test/rs6000: Add case to cover vector division

2021-07-07 Thread Segher Boessenkool
On Wed, Jul 07, 2021 at 05:03:07PM +0800, Kewen.Lin wrote:
> This patch is to add one test case to check if vectorizer
> can exploit vector division instrutions newly introduced
> by Power10.

Okay, great, thanks!


Segher


Re: [PATCH] rs6000: Add MMA __builtin_vsx_lxvp and __builtin_vsx_stxvp built-ins

2021-07-07 Thread Peter Bergner via Gcc-patches
On 7/6/21 5:05 PM, Segher Boessenkool wrote:
> On Tue, Jul 06, 2021 at 04:13:06PM -0500, Peter Bergner wrote:
>> On 7/6/21 3:08 PM, Bill Schmidt wrote:
>>> On 7/6/21 2:29 PM, Peter Bergner wrote:
 These are RS6000_BTC_GIMPLE, so I think they should be handled within
 the "if (gimple_func) ..." leg.  That said, how about the following
 change to resolve the issue you have?  I'll kick off a bootstrap and
 regtest for this change.
>>>
>>>
>>> Thanks, yes, that works for me!
>>
>> Great, thanks!
>>
>>
>> Segher, the patch was clean on testing.  Ok with you too?
> 
> Of course.  Okay for trunk and backports (you might want to hurry it
> for 11.2).  Thanks to you both!

Ok, committed to trunk.  I'm testing the GCC 11 backport now and will
commit if clean.  Thanks.


Peter



Re: [PATCH] rs6000: Support [u]mod3 for vector modulo insns

2021-07-07 Thread Segher Boessenkool
Hi!

On Wed, Jul 07, 2021 at 05:03:23PM +0800, Kewen.Lin wrote:
> This patch is to make Power10 newly introduced vector
> modulo instructions exploited in vectorized loops, it
> just simply renames existing define_insns as standard
> pattern names.

> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -
> gcc/ChangeLog:
> 
>   * config/rs6000/rs6000-builtin.def (MODS_V2DI, MODS_V4SI, MODU_V2DI,
>   MODU_V4SI): Adjust.
>   * config/rs6000/vsx.md (mods_): Renamed to...
>   (mod3): ... this.
>   (modu_): Renamed to...
>   (umod3): ... this.

("Rename", not "Renamed")

(It reads better if you put the "Adjust" entry after the rest, btw)

So I suppose the new testcase FAILs without these changes?

Okay for trunk.  Thanks!


Segher


Re: [PATCH 1/2] CALL_INSN may not be a real function call.

2021-07-07 Thread Jeff Law via Gcc-patches




On 7/7/2021 8:55 AM, Segher Boessenkool wrote:

On Mon, Jul 05, 2021 at 06:03:21PM -0600, Jeff Law wrote:

It reminds me a bit of millicode calls on the PA or calls to special
routines in libgcc.  They're calls to functions, but those functions do
not follow the standard ABI.

Something with CALL_INSN_FUNCTION_USAGE?  And maybe some clobbers?
I don't remember all the details on the PA side and the decision to 
express mul, div, mod as regular insns pre-dates my involvement in the 
PA port (hard to believe, but true).  I'd hazard a guess the goal behind 
making them regular insns was to not inhibit leaf function detection, 
avoid caller-saves around the "calls" and such. CALL_INSN_FUNCTION_USAGE 
didn't exist until the mid 90s.  I wouldn't be surprised if we could 
migrate the millicode calls to the CALL_INSN_FUNCTION_USAGE model.



jeff


[Patch] C, C++, Fortran, OpenMP: Add support for device-modifiers for 'omp target device'

2021-07-07 Thread Marcel Vollweiler

This patch adds device-modifiers to the device clause:

   #pragma omp target device ([ device-modifier :] integer-expression)

where device-modifier is either 'ancestor' or 'device_num'.

The 'device_num' case

   #pragma omp target device (device_num : integer-expression)

is treated in the same way as

   #pragma omp target device (integer-expression)

before.

For the 'ancestor' case

   #pragma omp target device (ancestor: integer-expression)

a message 'sorry, not yet implemented' is output.


-
Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München 
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank 
Thürauf
OpenMP: Add support for device-modifiers for 'omp target device'

gcc/c/ChangeLog:

* c-parser.c (c_parser_omp_clause_device): Add support for 
device-modifiers for 'omp target device'.

gcc/cp/ChangeLog:

* parser.c (cp_parser_omp_clause_device): Add support for 
device-modifiers for 'omp target device'.

gcc/fortran/ChangeLog:

* openmp.c (gfc_match_omp_clauses): Add support for 
device-modifiers for 'omp target device'.

gcc/testsuite/ChangeLog:

* c-c++-common/gomp/target-device-1.c: New test.
* c-c++-common/gomp/target-device-2.c: New test.
* gfortran.dg/gomp/target-device-1.f90: New test.
* gfortran.dg/gomp/target-device-2.f90: New test.

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 9a56e0c..defc52d 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -15864,37 +15864,117 @@ c_parser_omp_clause_map (c_parser *parser, tree list)
 }
 
 /* OpenMP 4.0:
-   device ( expression ) */
+   device ( expression )
+
+   OpenMP 5.0:
+   device ( [device-modifier :] integer-expression )
+
+   device-modifier:
+ ancestor | device_num */
 
 static tree
 c_parser_omp_clause_device (c_parser *parser, tree list)
 {
   location_t clause_loc = c_parser_peek_token (parser)->location;
+  location_t expr_loc;
+  c_expr expr;
+  tree c, t;
+
   matching_parens parens;
-  if (parens.require_open (parser))
+  if (!parens.require_open (parser))
+return list;
+
+  int pos = 1;
+  int pos_colon = 0;
+  while (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_NAME
+|| c_parser_peek_nth_token_raw (parser, pos)->type == CPP_COLON
+|| c_parser_peek_nth_token_raw (parser, pos)->type == CPP_COMMA)
 {
-  location_t expr_loc = c_parser_peek_token (parser)->location;
-  c_expr expr = c_parser_expr_no_commas (parser, NULL);
-  expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true);
-  tree c, t = expr.value;
-  t = c_fully_fold (t, false, NULL);
+  if (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_COLON)
+   {
+ pos_colon = pos;
+ break;
+   }
+  pos++;
+}
 
-  parens.skip_until_found_close (parser);
+  const char *err_msg;
+  if (pos_colon == 1)
+{
+  err_msg = "expected device-modifier % or %";
+  goto invalid_kind;
+}
 
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (t)))
+  if (pos_colon > 1)
+{
+  if (c_parser_peek_nth_token_raw (parser, 1)->type == CPP_NAME)
{
- c_parser_error (parser, "expected integer expression");
- return list;
+ c_token *tok = c_parser_peek_token (parser);
+ const char *p = IDENTIFIER_POINTER (tok->value);
+ if (strcmp ("ancestor", p) == 0)
+   {
+ if (pos_colon > 2)
+   {
+ err_msg = "expected only one device-modifier % or "
+   "%";
+ goto invalid_kind;
+   }
+
+ sorry_at (tok->location, "% not yet supported");
+ c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
+ return list;
+   }
+ else if (strcmp ("device_num", p) == 0)
+   {
+ if (pos_colon > 2)
+   {
+ err_msg = "expected only one device-modifier % or "
+   "%";
+ goto invalid_kind;
+   }
+ c_parser_consume_token (parser);
+ c_parser_peek_token (parser);
+ c_parser_consume_token (parser);
+   }
+ else
+   {
+ err_msg = "expected device-modifier % or "
+   "%";
+ goto invalid_kind;
+   }
+   }
+  else
+   {
+ err_msg = "expected device-modifier % or %";
+ goto invalid_kind;
}
+}
 
-  check_no_duplicate_clause (list, OMP_CLAUSE_DEVICE, "device");
+  expr_loc = c_parser_peek_token (parser)->location;
+  expr = c_parser_expr_no_commas (parser, NULL);
+  expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true);
+  c, t = expr.value;
+  t = c_fully_fold (t, false, NULL);
 
-  c = build_omp_clause (clause_loc, OMP_CLAUSE_DEVICE);
-  OMP_CLAUSE_DEVICE_ID (c) = t;
-  OMP_CLAUSE_CHAIN (c) = list;
-  list = c;

Re: [PATCH] rs6000: Add MMA __builtin_vsx_lxvp and __builtin_vsx_stxvp built-ins

2021-07-07 Thread Peter Bergner via Gcc-patches
On 7/7/21 11:55 AM, Peter Bergner wrote:
> On 7/6/21 5:05 PM, Segher Boessenkool wrote:
>> On Tue, Jul 06, 2021 at 04:13:06PM -0500, Peter Bergner wrote:
>>> On 7/6/21 3:08 PM, Bill Schmidt wrote:
 On 7/6/21 2:29 PM, Peter Bergner wrote:
> These are RS6000_BTC_GIMPLE, so I think they should be handled within
> the "if (gimple_func) ..." leg.  That said, how about the following
> change to resolve the issue you have?  I'll kick off a bootstrap and
> regtest for this change.


 Thanks, yes, that works for me!
>>>
>>> Great, thanks!
>>>
>>>
>>> Segher, the patch was clean on testing.  Ok with you too?
>>
>> Of course.  Okay for trunk and backports (you might want to hurry it
>> for 11.2).  Thanks to you both!
> 
> Ok, committed to trunk.  I'm testing the GCC 11 backport now and will
> commit if clean.  Thanks.

Backport testing was clean, so pushed to the GCC 11 branch.
I'll work on the GCC 10 backport in a day or two.

Peter




[PATCH v2] IBM Z: Use @PLT symbols for local functions in 64-bit mode

2021-07-07 Thread Ilya Leoshkevich via Gcc-patches
Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?

v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573614.html
v1 -> v2: Do not use UNSPEC_PLT in 64-bit code and rename it to
  UNSPEC_PLT31 (Ulrich, Andreas).  Do not append @PLT only to
  weak symbols in non-PIC code (Ulrich).  Add TLS tests.



This helps with generating code for kernel hotpatches, which contain
individual functions and are loaded more than 2G away from vmlinux.
This should not create performance regressions for the normal use
cases, because for local functions ld replaces @PLT calls with direct
calls.

gcc/ChangeLog:

* config/s390/predicates.md (bras_sym_operand): Accept all
functions in 64-bit mode, use UNSPEC_PLT31.
(larl_operand): Use UNSPEC_PLT31.
* config/s390/s390.c (s390_loadrelative_operand_p): Likewise.
(legitimize_pic_address): Likewise.
(s390_emit_tls_call_insn): Mark __tls_get_offset as function,
use UNSPEC_PLT31.
(s390_delegitimize_address): Use UNSPEC_PLT31.
(s390_output_addr_const_extra): Likewise.
(print_operand): Add @PLT to TLS calls, handle %K.
(s390_function_profiler): Mark __fentry__/_mcount as function,
use UNSPEC_PLT31.
(s390_output_mi_thunk): Use only UNSPEC_GOT.
(s390_emit_call): Use UNSPEC_PLT31.
(s390_emit_tpf_eh_return): Mark __tpf_eh_return as function.
* config/s390/s390.md (UNSPEC_PLT31): Rename from UNSPEC_PLT.
(*movdi_64): Use %K.
(reload_base_64): Likewise.
(*sibcall_brc): Likewise.
(*sibcall_brcl): Likewise.
(*sibcall_value_brc): Likewise.
(*sibcall_value_brcl): Likewise.
(*bras): Likewise.
(*brasl): Likewise.
(*bras_r): Likewise.
(*brasl_r): Likewise.
(*bras_tls): Likewise.
(*brasl_tls): Likewise.
(main_base_64): Likewise.
(reload_base_64): Likewise.
(@split_stack_call): Likewise.

gcc/testsuite/ChangeLog:

* g++.dg/ext/visibility/noPLT.C: Skip on s390x.
* gcc.target/s390/nodatarel-1.c: Move foostatic to the new
tests.
* gcc.target/s390/pr80080-4.c: Allow @PLT suffix.
* gcc.target/s390/risbg-ll-3.c: Likewise.
* gcc.target/s390/call.h: Common code for the new tests.
* gcc.target/s390/call31-z10-pic-nodatarel.c: New test.
* gcc.target/s390/call31-z10-pic.c: New test.
* gcc.target/s390/call31-z10.c: New test.
* gcc.target/s390/call31-z9-pic-nodatarel.c: New test.
* gcc.target/s390/call31-z9-pic.c: New test.
* gcc.target/s390/call31-z9.c: New test.
* gcc.target/s390/call64-z10-pic-nodatarel.c: New test.
* gcc.target/s390/call64-z10-pic.c: New test.
* gcc.target/s390/call64-z10.c: New test.
* gcc.target/s390/call64-z9-pic-nodatarel.c: New test.
* gcc.target/s390/call64-z9-pic.c: New test.
* gcc.target/s390/call64-z9.c: New test.
* gcc.target/s390/tls.h: Common code for the new TLS tests.
* gcc.target/s390/tls31-pic.c: New test.
* gcc.target/s390/tls31.c: New test.
* gcc.target/s390/tls64-pic.c: New test.
* gcc.target/s390/tls64.c: New test.
---
 gcc/config/s390/predicates.md |  9 ++-
 gcc/config/s390/s390.c| 73 ++-
 gcc/config/s390/s390.md   | 32 
 gcc/testsuite/g++.dg/ext/visibility/noPLT.C   |  2 +-
 gcc/testsuite/gcc.target/s390/call.h  | 40 ++
 .../s390/call31-z10-pic-nodatarel.c   | 16 
 .../gcc.target/s390/call31-z10-pic.c  | 16 
 gcc/testsuite/gcc.target/s390/call31-z10.c| 15 
 .../gcc.target/s390/call31-z9-pic-nodatarel.c | 16 
 gcc/testsuite/gcc.target/s390/call31-z9-pic.c | 16 
 gcc/testsuite/gcc.target/s390/call31-z9.c | 15 
 .../s390/call64-z10-pic-nodatarel.c   | 17 +
 .../gcc.target/s390/call64-z10-pic.c  | 17 +
 gcc/testsuite/gcc.target/s390/call64-z10.c| 15 
 .../gcc.target/s390/call64-z9-pic-nodatarel.c | 17 +
 gcc/testsuite/gcc.target/s390/call64-z9-pic.c | 17 +
 gcc/testsuite/gcc.target/s390/call64-z9.c | 15 
 gcc/testsuite/gcc.target/s390/nodatarel-1.c   | 26 +--
 gcc/testsuite/gcc.target/s390/pr80080-4.c |  2 +-
 gcc/testsuite/gcc.target/s390/risbg-ll-3.c|  6 +-
 gcc/testsuite/gcc.target/s390/tls.h   | 23 ++
 gcc/testsuite/gcc.target/s390/tls31-pic.c | 14 
 gcc/testsuite/gcc.target/s390/tls31.c |  9 +++
 gcc/testsuite/gcc.target/s390/tls64-pic.c | 14 
 gcc/testsuite/gcc.target/s390/tls64.c |  9 +++
 25 files changed, 382 insertions(+), 69 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/call.h
 create mode 100644 gcc/testsuite/gcc.target/s390/call31-z10-pic-nodatarel.c
 create mode 100644 gcc/testsuite/gcc.target/s390/call31-z10-pic.c
 create mode 100644 gcc/tes

Re: [PATCH 2/4 REVIEW] libtool.m4: fix nm BSD flag detection

2021-07-07 Thread Nick Alcock via Gcc-patches
On 7 Jul 2021, Nick Clifton told this:

> Hi Nick,
>
>> Ping?
>
> Oops.

I sent a bunch of pings out at the same time, to a bunch of different
projects. You are the only person to respond, so thank you!

>>> PR libctf/27482
>>> * libtool.m4 (LT_PATH_NM): Try BSDization flags with a user-provided
>
> Changes to libtool need to be posted to the libtool project:
>
>   https://www.gnu.org/software/libtool/

I considered this, but there is *serious* divergence between the
libtool.m4 in our tree and upstream. Fixing this divergence looks to be
a fairly major project in and of itself :( the last real sync looked
like being all the way back in 2008.

> They have mailing lists for bug reports and patch submissions.
>
> Once the patch has been accepted there it can be backported to the gcc and
> gdb/binutils repositories...

AIUI, libtool has no maintainer currently, and the divergence is such
that the patch might need a near-complete rewrite anyway :( (there were
changes to this function upstream in 2012 and 2013, long after our last
sync.)

(However, I'm going to try in any case. I may have found a sucker, er, I
mean coworker willing to take this on.)


Repost: [PATCH] Generate 128-bit int divide/modulus on power10.

2021-07-07 Thread Michael Meissner via Gcc-patches
[PATCH] Generate 128-bit int divide/modulus on power10.

This patch adds support for the VDIVSQ, VDIVUQ, VMODSQ, and VMODUQ
instructions to do 128-bit arithmetic.

Ideally this patch can be approved in time to be back ported to GCC 11.2.

Can I check this into the master branch, and eventually into GCC 11?

2021-07-07  Michael Meissner  

gcc/
PR target/100809
* config/rs6000/rs6000.md (udivti3): New insn.
(divti3): New insn.
(umodti3): New insn.
(modti3): New insn.

gcc/testsuite/
PR target/100809
* gcc.target/powerpc/p10-vdivq-vmodq.c: New test.
---
 gcc/config/rs6000/rs6000.md   | 34 +++
 .../gcc.target/powerpc/p10-vdivq-vmodq.c  | 27 +++
 2 files changed, 61 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index e84d0311cc2..4e53cf28dde 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3234,6 +3234,14 @@ (define_insn "udiv3"
   [(set_attr "type" "div")
(set_attr "size" "")])
 
+(define_insn "udivti3"
+  [(set (match_operand:TI 0 "altivec_register_operand" "=v")
+(udiv:TI (match_operand:TI 1 "altivec_register_operand" "v")
+(match_operand:TI 2 "altivec_register_operand" "v")))]
+  "TARGET_POWER10 && TARGET_POWERPC64"
+  "vdivuq %0,%1,%2"
+  [(set_attr "type" "vecdiv")
+   (set_attr "size" "128")])
 
 ;; For powers of two we can do sra[wd]i/addze for divide and then adjust for
 ;; modulus.  If it isn't a power of two, force operands into register and do
@@ -3324,6 +3332,15 @@ (define_insn_and_split "*div3_sra_dot2"
(set_attr "length" "8,12")
(set_attr "cell_micro" "not")])
 
+(define_insn "divti3"
+  [(set (match_operand:TI 0 "altivec_register_operand" "=v")
+(div:TI (match_operand:TI 1 "altivec_register_operand" "v")
+   (match_operand:TI 2 "altivec_register_operand" "v")))]
+  "TARGET_POWER10 && TARGET_POWERPC64"
+  "vdivsq %0,%1,%2"
+  [(set_attr "type" "vecdiv")
+   (set_attr "size" "128")])
+
 (define_expand "mod3"
   [(set (match_operand:GPR 0 "gpc_reg_operand")
(mod:GPR (match_operand:GPR 1 "gpc_reg_operand")
@@ -3424,6 +3441,23 @@ (define_peephole2
(minus:GPR (match_dup 1)
   (match_dup 3)))])
 
+(define_insn "umodti3"
+  [(set (match_operand:TI 0 "altivec_register_operand" "=v")
+(umod:TI (match_operand:TI 1 "altivec_register_operand" "v")
+(match_operand:TI 2 "altivec_register_operand" "v")))]
+  "TARGET_POWER10 && TARGET_POWERPC64"
+  "vmoduq %0,%1,%2"
+  [(set_attr "type" "vecdiv")
+   (set_attr "size" "128")])
+
+(define_insn "modti3"
+  [(set (match_operand:TI 0 "altivec_register_operand" "=v")
+(mod:TI (match_operand:TI 1 "altivec_register_operand" "v")
+   (match_operand:TI 2 "altivec_register_operand" "v")))]
+  "TARGET_POWER10 && TARGET_POWERPC64"
+  "vmodsq %0,%1,%2"
+  [(set_attr "type" "vecdiv")
+   (set_attr "size" "128")])
 
 ;; Logical instructions
 ;; The logical instructions are mostly combined by using match_operator,
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c 
b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
new file mode 100644
index 000..cd29b0a4b6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
@@ -0,0 +1,27 @@
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+unsigned __int128 u_div(unsigned __int128 a, unsigned __int128 b)
+{
+   return a/b;
+}
+
+unsigned __int128 u_mod(unsigned __int128 a, unsigned __int128 b)
+{
+   return a%b;
+}
+__int128 s_div(__int128 a, __int128 b)
+{
+   return a/b;
+}
+
+__int128 s_mod(__int128 a, __int128 b)
+{
+   return a%b;
+}
+
+/* { dg-final { scan-assembler {\mvdivsq\M} } } */
+/* { dg-final { scan-assembler {\mvdivuq\M} } } */
+/* { dg-final { scan-assembler {\mvmodsq\M} } } */
+/* { dg-final { scan-assembler {\mvmoduq\M} } } */
-- 
2.31.1


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Repost: [PATCH] Fix long double tests when default long double is not IBM.

2021-07-07 Thread Michael Meissner via Gcc-patches
[PATCH] Fix long double tests when default long double is not IBM.

This patch adds 3 more selections to target-supports.exp to see if we can force
the compiler to use a particular long double format (IEEE 128-bit, IBM extended
double, 64-bit), and the library support will track the changes for the long
double.  This is needed because two of the tests in the test suite use long
double, and they are actually testing IBM extended double.

This patch also forces the two tests that explicitly require long double
to use the IBM double-double encoding to explicitly run the test.  This
requires GLIBC 2.32 or greater in order to do the switch.

I have run tests on a little endian power9 system with 3 compilers.  There were
no regressions with these patches, and the two tests in the following patches
now work if the default long double is not IBM 128-bit:

*   One compiler used the default IBM 128-bit format;
*   One compiler used the IEEE 128-bit format; (and)
*   One compiler used 64-bit long doubles.

I have also tested compilers on a big endian power8 system with a compiler
defaulting to power8 code generation and another with the default cpu
set.  There were no regressions.

Can I check this patch into the master branch?

2021-07-07  Michael Meissner  

gcc/testsuite/
PR target/70117
* gcc.target/powerpc/pr70117.c: Force the long double type to use
the IBM 128-bit format.
* c-c++-common/dfp/convert-bfp-11.c: Force using IBM 128-bit long
double.  Remove check for 64-bit long double.
* lib/target-supports.exp
(add_options_for_ppc_long_double_override_ibm128): New function.
(check_effective_target_ppc_long_double_override_ibm128): New
function.
(add_options_for_ppc_long_double_override_ieee128): New function.
(check_effective_target_ppc_long_double_override_ieee128): New
function.
(add_options_for_ppc_long_double_override_64bit): New function.
(check_effective_target_ppc_long_double_override_64bit): New
function.
---
 .../c-c++-common/dfp/convert-bfp-11.c |  18 +--
 gcc/testsuite/gcc.target/powerpc/pr70117.c|   6 +-
 gcc/testsuite/lib/target-supports.exp | 107 ++
 3 files changed, 121 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c 
b/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
index 95c433d2c24..35da07d1fa4 100644
--- a/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
+++ b/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
@@ -1,9 +1,14 @@
-/* { dg-skip-if "" { ! "powerpc*-*-linux*" } } */
+/* { dg-require-effective-target dfp } */
+/* { dg-require-effective-target ppc_long_double_override_ibm128 } */
+/* { dg-add-options ppc_long_double_override_ibm128 } */
 
-/* Test decimal float conversions to and from IBM 128-bit long double. 
-   Checks are skipped at runtime if long double is not 128 bits.
-   Don't force 128-bit long doubles because runtime support depends
-   on glibc.  */
+/* We force the long double type to be IBM 128-bit because the CONVERT_TO_PINF
+   tests will fail if we use IEEE 128-bit floating point.  This is due to IEEE
+   128-bit having a larger exponent range than IBM 128-bit extended double.  So
+   tests that would generate an infinity with IBM 128-bit will generate a
+   normal number with IEEE 128-bit.  */
+
+/* Test decimal float conversions to and from IBM 128-bit long double.   */
 
 #include "convert.h"
 
@@ -36,9 +41,6 @@ CONVERT_TO_PINF (312, tf, sd, 1.6e+308L, d32)
 int
 main ()
 {
-  if (sizeof (long double) != 16)
-return 0;
-
   convert_101 ();
   convert_102 ();
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr70117.c 
b/gcc/testsuite/gcc.target/powerpc/pr70117.c
index 3bbd2c595e0..8a5fad1dee0 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr70117.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr70117.c
@@ -1,5 +1,7 @@
-/* { dg-do run { target { powerpc*-*-linux* powerpc*-*-darwin* powerpc*-*-aix* 
rs6000-*-* } } } */
-/* { dg-options "-std=c99 -mlong-double-128 -O2" } */
+/* { dg-do run } */
+/* { dg-require-effective-target ppc_long_double_override_ibm128 } */
+/* { dg-options "-std=c99 -O2" } */
+/* { dg-add-options ppc_long_double_override_ibm128 } */
 
 #include 
 
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 789723fb287..0a392cb0fd5 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2360,6 +2360,113 @@ proc check_effective_target_ppc_ieee128_ok { } {
 }]
 }
 
+# Check if we can explicitly override the long double format to use the IBM
+# 128-bit extended double format, and GLIBC supports doing this override by
+# switching the sprintf to handle IBM 128-bit long double.
+
+proc add_options_for_ppc_long_double_override_ibm128 { flags } {
+if { [istarget powerpc*-*-*] } {
+   return "$flags -mlong-double-128 -Wno-psabi -mabi=ibmlongdouble"
+}
+re

Repost: [PATCH] Change rs6000_const_f32_to_i32 return type.

2021-07-07 Thread Michael Meissner via Gcc-patches
[PATCH] Change rs6000_const_f32_to_i32 return type.

The function rs6000_const_f32_to_i32 called REAL_VALUE_TO_TARGET_SINGLE
with a long long type and returns it.  This patch changes the type to long
which is the proper type for REAL_VALUE_TO_TARGET_SINGLE.

2021-07-07  Michael Meissner  

gcc/
* config/rs6000/rs6000-protos.h (rs6000_const_f32_to_i32): Change
return type to long.
* config/rs6000/rs6000.c (rs6000_const_f32_to_i32): Change return
type to long.
---
 gcc/config/rs6000/rs6000-protos.h | 2 +-
 gcc/config/rs6000/rs6000.c| 6 --
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 9de294d3b28..94bf961c6b7 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -281,7 +281,7 @@ extern void rs6000_asm_output_dwarf_pcrel (FILE *file, int 
size,
   const char *label);
 extern void rs6000_asm_output_dwarf_datarel (FILE *file, int size,
 const char *label);
-extern long long rs6000_const_f32_to_i32 (rtx operand);
+extern long rs6000_const_f32_to_i32 (rtx operand);
 
 /* Declare functions in rs6000-c.c */
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 9a5db63d0ef..de11de5e079 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -27936,10 +27936,12 @@ rs6000_invalid_conversion (const_tree fromtype, 
const_tree totype)
   return NULL;
 }
 
-long long
+/* Convert a SFmode constant to the integer bit pattern.  */
+
+long
 rs6000_const_f32_to_i32 (rtx operand)
 {
-  long long value;
+  long value;
   const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
 
   gcc_assert (GET_MODE (operand) == SFmode);
-- 
2.31.1


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Repost: [PATCH] Fix vec-splati-runnable.c test.

2021-07-07 Thread Michael Meissner via Gcc-patches
[PATCH] Fix vec-splati-runnable.c test.

I noticed that the vec-splati-runnable.c did not have an abort after one
of the tests.  If the test was run with optimization, the optimizer could
delete some of the tests and throw off the count.  However, due to the
fact that the value being loaded in that test is undefined, I did not
check what value was loaded, but I just stored it into a volatile global
variable.

2021-07-07  Michael Meissner  

gcc/testsuite/
* gcc.target/powerpc/vec-splati-runnable.c: Run test with -O2
optimization.  Do not check what XXSPLTIDP generates if the value
is undefined.
---
 .../gcc.target/powerpc/vec-splati-runnable.c  | 29 ++-
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c 
b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
index e84ce77a21d..a135279b1d7 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splati-runnable.c
@@ -1,7 +1,7 @@
 /* { dg-do run { target { power10_hw } } } */
 /* { dg-do link { target { ! power10_hw } } } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -save-temps" } */
+/* { dg-options "-mdejagnu-cpu=power10 -save-temps -O2" } */
 #include 
 
 #define DEBUG 0
@@ -12,6 +12,8 @@
 
 extern void abort (void);
 
+volatile vector double vresult_d_undefined;
+
 int
 main (int argc, char *argv [])
 {
@@ -85,25 +87,12 @@ main (int argc, char *argv [])
 #endif
   }
 
-  /* This test will generate a "note" to the user that the argument
- is subnormal.  It is not an error, but results are not defined.  */
-  vresult_d = (vector double) { 2.0, 3.0 };
-  expected_vresult_d = (vector double) { 6.6E-42f, 6.6E-42f };
-
-  vresult_d = vec_splatid (6.6E-42f);
-
-  /* Although the instruction says the results are not defined, it does seem
- to work, at least on Mambo.  But no guarentees!  */
-  if (!vec_all_eq (vresult_d,  expected_vresult_d)) {
-#if DEBUG
-printf("ERROR, vec_splati (6.6E-42f)\n");
-for(i = 0; i < 2; i++)
-  printf(" vresult_d[%i] = %e, expected_vresult_d[%i] = %e\n",
-i, vresult_d[i], i, expected_vresult_d[i]);
-#else
-;
-#endif
-  }
+  /* This test will generate a "note" to the user that the argument is
+ subnormal.  It is not an error, but results are not defined.  Because this
+ is undefined, we cannot check that any value is correct.  Just store it in
+ a volatile variable so the XXSPLTIDP instruction gets generated and the
+ warning message printed. */
+  vresult_d_undefined = vec_splatid (6.6E-42f);
 
   /* Vector splat immediate */
   vsrc_a_int = (vector int) { 2, 3, 4, 5 };
-- 
2.31.1


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Repost: [PATCH] Deal with prefixed loads/stores in tests, PR testsuite/100166

2021-07-07 Thread Michael Meissner via Gcc-patches
[PATCH] Deal with prefixed loads/stores in tests, PR testsuite/100166

This patch updates the various tests in the testsuite to treat plxv
and pstxv as being vector loads/stores.  This shows up if you run the
testsuite with a compiler configured with the option: --with-cpu=power10.

I have verified that these tests now all pass when I build and test a compiler
on a power10 system using --with-cpu=power10.  I have verified that they
continue to run on power9 little endian and power8 big endian systems.

Can I check this into the master branch?

2021-07-07  Michael Meissner  

gcc/testsuite/
PR testsuite/100166
* gcc/testsuite/gcc.dg/vect/costmodel/ppc/costmodel-bb-slp-9a-pr63175.c:
* gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c:
* gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c:
* gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c:
* gcc.target/powerpc/fold-vec-load-builtin_vec_xl-int.c:
* gcc.target/powerpc/fold-vec-load-builtin_vec_xl-longlong.c:
* gcc.target/powerpc/fold-vec-load-builtin_vec_xl-short.c:
* gcc.target/powerpc/fold-vec-load-vec_vsx_ld-char.c:
* gcc.target/powerpc/fold-vec-load-vec_vsx_ld-double.c:
* gcc.target/powerpc/fold-vec-load-vec_vsx_ld-float.c:
* gcc.target/powerpc/fold-vec-load-vec_vsx_ld-int.c:
* gcc.target/powerpc/fold-vec-load-vec_vsx_ld-longlong.c:
* gcc.target/powerpc/fold-vec-load-vec_vsx_ld-short.c:
* gcc.target/powerpc/fold-vec-load-vec_xl-char.c:
* gcc.target/powerpc/fold-vec-load-vec_xl-double.c:
* gcc.target/powerpc/fold-vec-load-vec_xl-float.c:
* gcc.target/powerpc/fold-vec-load-vec_xl-int.c:
* gcc.target/powerpc/fold-vec-load-vec_xl-longlong.c:
* gcc.target/powerpc/fold-vec-load-vec_xl-short.c:
* gcc.target/powerpc/fold-vec-splat-floatdouble.c:
* gcc.target/powerpc/fold-vec-splat-longlong.c:
* gcc.target/powerpc/fold-vec-store-builtin_vec_xst-char.c:
* gcc.target/powerpc/fold-vec-store-builtin_vec_xst-double.c:
* gcc.target/powerpc/fold-vec-store-builtin_vec_xst-float.c:
* gcc.target/powerpc/fold-vec-store-builtin_vec_xst-int.c:
* gcc.target/powerpc/fold-vec-store-builtin_vec_xst-longlong.c:
* gcc.target/powerpc/fold-vec-store-builtin_vec_xst-short.c:
* gcc.target/powerpc/fold-vec-store-vec_vsx_st-char.c:
* gcc.target/powerpc/fold-vec-store-vec_vsx_st-double.c:
* gcc.target/powerpc/fold-vec-store-vec_vsx_st-float.c:
* gcc.target/powerpc/fold-vec-store-vec_vsx_st-int.c:
* gcc.target/powerpc/fold-vec-store-vec_vsx_st-longlong.c:
* gcc.target/powerpc/fold-vec-store-vec_vsx_st-short.c:
* gcc.target/powerpc/fold-vec-store-vec_xst-char.c:
* gcc.target/powerpc/fold-vec-store-vec_xst-double.c:
* gcc.target/powerpc/fold-vec-store-vec_xst-float.c:
* gcc.target/powerpc/fold-vec-store-vec_xst-int.c:
* gcc.target/powerpc/fold-vec-store-vec_xst-longlong.c:
* gcc.target/powerpc/fold-vec-store-vec_xst-short.c:
* gcc.target/powerpc/lvsl-lvsr.c:
* gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c:
Update insn counts to account for power10 prefixed loads and
stores.
---
 .../vect/costmodel/ppc/costmodel-bb-slp-9a-pr63175.c   | 2 +-
 .../gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c | 2 +-
 .../powerpc/fold-vec-load-builtin_vec_xl-double.c  | 2 +-
 .../powerpc/fold-vec-load-builtin_vec_xl-float.c   | 2 +-
 .../gcc.target/powerpc/fold-vec-load-builtin_vec_xl-int.c  | 2 +-
 .../powerpc/fold-vec-load-builtin_vec_xl-longlong.c| 2 +-
 .../powerpc/fold-vec-load-builtin_vec_xl-short.c   | 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_vsx_ld-char.c | 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_vsx_ld-double.c   | 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_vsx_ld-float.c| 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_vsx_ld-int.c  | 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_vsx_ld-longlong.c | 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_vsx_ld-short.c| 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_xl-char.c | 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_xl-double.c   | 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_xl-float.c| 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_xl-int.c  | 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_xl-longlong.c | 2 +-
 .../gcc.target/powerpc/fold-vec-load-vec_xl-short.c| 2 +-
 .../gcc.target/powerpc/fold-vec-splat-floatdouble.c| 7 ---
 gcc/testsuite/gcc.target/powerpc/fold-vec-splat-longlong.c | 2 +-
 .../powerpc/fold-vec-store-builtin_vec_xst-char.c  | 2 +-
 .../powerpc/fold-vec-store-builtin_vec_xst-double.c| 2 +-
 .../powerpc/fold-vec-store-builtin_vec_xst-float.c | 2 +-
 .../powerpc/fold-vec-store-builtin_vec_xst-int.c   

Repost: [PATCH] PR 100167: Fix vector long long multiply/divide tests on power10

2021-07-07 Thread Michael Meissner via Gcc-patches
[PATCH] PR 100167: Fix vector long long multiply/divide tests on power10.

This patch updates the vector long long multiply and divide tests to
supply the correct code information if power10 code generation is used.

2021-07-07  Michael Meissner  

gcc/testsuite/
PR testsuite/100167
* gcc.target/powerpc/fold-vec-div-longlong.c:
* gcc.target/powerpc/fold-vec-mult-longlong.c: Fix expected code
generation on power10.
---
 gcc/testsuite/gcc.target/powerpc/fold-vec-div-longlong.c  | 7 +--
 gcc/testsuite/gcc.target/powerpc/fold-vec-mult-longlong.c | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-div-longlong.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-div-longlong.c
index 312e984d3cc..f6a9b290ae5 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-div-longlong.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-div-longlong.c
@@ -19,5 +19,8 @@ test6 (vector unsigned long long x, vector unsigned long long 
y)
 {
   return vec_div (x, y);
 }
-/* { dg-final { scan-assembler-times {\mdivd\M} 2 } } */
-/* { dg-final { scan-assembler-times {\mdivdu\M} 2 } } */
+
+/* { dg-final { scan-assembler-times {\mdivd\M}   2 { target { ! 
has_arch_pwr10 } } } } */
+/* { dg-final { scan-assembler-times {\mdivdu\M}  2 { target { ! 
has_arch_pwr10 } } } } */
+/* { dg-final { scan-assembler-times {\mvdivsd\M} 1 { target {   
has_arch_pwr10 } } } } */
+/* { dg-final { scan-assembler-times {\mvdivud\M} 1 { target {   
has_arch_pwr10 } } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-longlong.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-longlong.c
index 38dba9f5023..bd210e34801 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-longlong.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-longlong.c
@@ -20,5 +20,6 @@ test6 (vector unsigned long long x, vector unsigned long long 
y)
   return vec_mul (x, y);
 }
 
-/* { dg-final { scan-assembler-times "\[ \t\]mulld " 4 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {\mmulld\M}  4 { target { lp64 && { ! 
has_arch_pwr10 } } } } } */
+/* { dg-final { scan-assembler-times {\mvmulld\M} 2 { target { has_arch_pwr10  
   } } } } */
 
-- 
2.31.1


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Repost: [PATCH] PR 100170: Fix eq/ne tests on power10.

2021-07-07 Thread Michael Meissner via Gcc-patches
[PATCH] PR 100170: Fix eq/ne tests on power10.

This patch updates eq/ne tests in the testsuite to adjust the test if
power10 code generation is used.

I have verified that these tests run on a power10 system using the
--with-cpu=power10 configuration option, and they continue to run on power9
little endian and power8 big endian systems.

Can I check this patch into th master branch?

2021-07-07  Michael Meissner  

gcc/testsuite/
PR testsuite/100170
* gcc.target/powerpc/ppc-eq0-1.c: Add support for the setbc
instruction.
* gcc.target/powerpc/ppc-ne0-1.c: Update instruction counts on
power10.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Repost: [PATCH] PR 100168: Fix call test on power10.

2021-07-07 Thread Michael Meissner via Gcc-patches
[PATCH] PR 100168: Fix call test on power10.

Fix a test that was checking for 64-bit TOC calls, to also allow for
PC-relative calls.

I have verified that this test passes when run on a power10 system configured
with --with-cpu=power10 and it continues to pass on power9 little endian and
power8 big endian systems.

Can I check this into the master branch?

2021-07-07  Michael Meissner  

gcc/testsuite
PR testsuite/100168
* gcc.dg/pr56727-2.c: Add support for PC-relative calls.
---
 gcc/testsuite/gcc.dg/pr56727-2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr56727-2.c b/gcc/testsuite/gcc.dg/pr56727-2.c
index c54369ed25e..77fdf4bc350 100644
--- a/gcc/testsuite/gcc.dg/pr56727-2.c
+++ b/gcc/testsuite/gcc.dg/pr56727-2.c
@@ -18,4 +18,4 @@ void h ()
 
 /* { dg-final { scan-assembler "@(PLT|plt)" { target i?86-*-* x86_64-*-* } } } 
*/
 /* { dg-final { scan-assembler "@(PLT|plt)" { target { powerpc*-*-linux* && 
ilp32 } } } } */
-/* { dg-final { scan-assembler "bl f\n\\s*nop" { target { powerpc*-*-linux* && 
lp64 } } } } */
+/* { dg-final { scan-assembler "(bl f\n\\s*nop)|(bl f@notoc)" { target { 
powerpc*-*-linux* && lp64 } } } } */
-- 
2.31.1


-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Re: PING 2 [PATCH] correct handling of variable offset minus constant in -Warray-bounds (PR 100137)

2021-07-07 Thread Martin Sebor via Gcc-patches

On 7/7/21 1:38 AM, Richard Biener wrote:

On Tue, Jul 6, 2021 at 5:47 PM Martin Sebor via Gcc-patches
 wrote:


Ping: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573349.html


+  if (TREE_CODE (axstype) != UNION_TYPE)

what about QUAL_UNION_TYPE?  (why constrain union type accesses
here - note you don't seem to constrain accesses of union members here)


I didn't know a QUAL_UNION_TYPE was a thing.  Removing the test
doesn't seem to cause any regressions so let me do that in a followup.



+if (tree access_size = TYPE_SIZE_UNIT (axstype))

+  /* The byte size of the array has already been determined above
+ based on a pointer ARG.  Set ELTSIZE to the size of the type
+ it points to and REFTYPE to the array with the size, rounded
+ down as necessary.  */
+  if (POINTER_TYPE_P (reftype))
+reftype = TREE_TYPE (reftype);
+  if (TREE_CODE (reftype) == ARRAY_TYPE)
+reftype = TREE_TYPE (reftype);
+  if (tree refsize = TYPE_SIZE_UNIT (reftype))
+if (TREE_CODE (refsize) == INTEGER_CST)
+  eltsize = wi::to_offset (refsize);

probably pre-existing but the pointer indirection is definitely confusing
me again and again given the variable is named 'reftype' - obviously
an access to a pointer does not have any element size.  Possibly the
paths arriving here ensure somehow that the only case is when
reftype is not the access type but a pointer to the accessed memory.
"jump-threading" the source might help me avoiding to trip over this
again and again ...


I agree (it is confusing).  There's more to simplify here.  It's on
my to do list so let me see about this piece of code then.



The patch removes a lot of odd code, I like that.  You know this code best
and it's hard to spot errors.

So OK, you'll deal with the fallout.


I certainly will.  Pushed in r12-2132.

Thanks
Martin



Thanks,
Richard.


On 6/28/21 1:33 PM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573349.html

On 6/21/21 4:25 PM, Martin Sebor wrote:

-Warray-bounds relies on similar logic as -Wstringop-overflow et al.,
but using its own algorithm, including its own bugs such as PR 100137.
The attached patch takes the first step toward unifying the logic
between the warnings.  It changes a subset of -Warray-bounds to call
compute_objsize() to detect out-of-bounds indices.  Besides fixing
the bug this also nicely simplifies the code and improves
the consistency between the informational messages printed by both
classes of warnings.

The changes to the test suite are extensive mainly because of
the different format of the diagnostics resulting from slightly
tighter bounds of offsets computed by the new algorithm, and in
smaller part because the change lets -Warray-bounds diagnose some
problems it previously missed due to the limitations of its own
solution.

The false positive reported in PR 100137 is a 10/11/12 regression
but this change is too intrusive to backport.  I have a smaller
and more targeted patch I plan to backport in its stead.

Tested on x86_64-linux.

Martin








Re: [PATCH] c++: Fix PR101247 in another way

2021-07-07 Thread Jason Merrill via Gcc-patches

On 7/7/21 11:29 AM, Patrick Palka wrote:

r12-1989 fixed the testcase in the PR, but unfortunately the fix is
buggy:

   1. It breaks the case where the common template between the
  TEMPLATE_DECL t and ctx_parms is the innermost template (as in
  concepts-memtmpl5.C below).  This can be fixed by instead
  passing the TREE_TYPE of ctmpl to common_enclosing_class when
  ctmpl is a class template.
   2. Even if that's fixed, the analogous case where the innermost
  template is a partial specialization is still broken (as in
  concepts-memtmpl5a.C below), because ctmpl is always the primary
  template.

So this patch instead fixes the general problem in a a simpler way that
doesn't rely on ctx_parms at all: when looking for the template
parameters of a TEMPLATE_DECL that are shared with the current template
context, just walk its DECL_CONTEXT.  As long as the template is not
overly general (e.g. we didn't pass it through most_general_template),
this should give us exactly what we want, since if a TEMPLATE_DECL can
be referred to from some template context then the template parameters
it uses must all be in-scope and represented in its DECL_CONTEXT.  This
effectively makes us treat TEMPLATE_DECLs more similarly to other _DECLs
(whose DECL_CONTEXT we also walk).

Bootstrapped and regtested on x86_64-pc-linux-gnu, also tested on
cmcstl2 and range-v3, does this look OK for trunk/11?


OK.


PR c++/101247

gcc/cp/ChangeLog:

* pt.c (any_template_parm_r) : Just walk the
DECL_CONTEXT.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-memtmpl4.C: Uncomment the commented out
example, which we now handle correctly.
* g++.dg/cpp2a/concepts-memtmpl5.C: New test.
* g++.dg/cpp2a/concepts-memtmpl5a.C: New test.
---
  gcc/cp/pt.c | 14 +-
  gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl4.C  |  2 +-
  gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5.C  | 11 +++
  gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5a.C | 15 +++
  4 files changed, 32 insertions(+), 10 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5a.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 7e56ccfc45f..dc0f0b7b58e 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -10728,15 +10728,11 @@ any_template_parm_r (tree t, void *data)
break;
  
  case TEMPLATE_DECL:

-  {
-   /* If T is a member template that shares template parameters with
-  ctx_parms, we need to mark all those parameters for mapping.  */
-   if (tree ctmpl = TREE_TYPE (INNERMOST_TEMPLATE_PARMS (ftpi->ctx_parms)))
- if (tree com = common_enclosing_class (DECL_CONTEXT (t),
-DECL_CONTEXT (ctmpl)))
-   if (tree ti = CLASSTYPE_TEMPLATE_INFO (com))
- WALK_SUBTREE (TI_ARGS (ti));
-  }
+  /* If T is a member template that shares template parameters with
+ctx_parms, we need to mark all those parameters for mapping.
+To that end, it should suffice to just walk the DECL_CONTEXT of
+the template (assuming the template is not overly general).  */
+  WALK_SUBTREE (DECL_CONTEXT (t));
break;
  
  case LAMBDA_EXPR:

diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl4.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl4.C
index 625149e5025..f990ae17859 100644
--- a/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl4.C
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl4.C
@@ -24,5 +24,5 @@ int main()
  {
A::B::f(0);
A::C::f(0);
-  // A::C::g();
+  A::C::g();
  }
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5.C
new file mode 100644
index 000..3c83bb88485
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5.C
@@ -0,0 +1,11 @@
+// PR c++/101247
+// { dg-do compile { target concepts } }
+
+template struct A {
+  template static constexpr bool d = true;
+  static void g() requires d;
+};
+
+int main() {
+  A::g();
+}
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5a.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5a.C
new file mode 100644
index 000..458f1cdf856
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-memtmpl5a.C
@@ -0,0 +1,15 @@
+// PR c++/101247
+// { dg-do compile { target concepts } }
+// A variant of concepts-memtmpl5.C that uses a partial specialization
+// of A instead of the primary template.
+
+template struct A;
+
+template requires true struct A {
+  template static constexpr bool d = true;
+  static void g() requires d;
+};
+
+int main() {
+  A::g();
+}





Re: [PATCH v2] IBM Z: Use @PLT symbols for local functions in 64-bit mode

2021-07-07 Thread Ilya Leoshkevich via Gcc-patches
On Wed, 2021-07-07 at 21:03 +0200, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  Ok for master?
> 
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573614.html
> v1 -> v2: Do not use UNSPEC_PLT in 64-bit code and rename it to
>   UNSPEC_PLT31 (Ulrich, Andreas).  Do not append @PLT only to
>   weak symbols in non-PIC code (Ulrich).  Add TLS tests.
> 
> 
> 
> This helps with generating code for kernel hotpatches, which contain
> individual functions and are loaded more than 2G away from vmlinux.
> This should not create performance regressions for the normal use
> cases, because for local functions ld replaces @PLT calls with direct
> calls.

Please disregard this patch, I just realized I missed two
output_asm_insn () calls in s390.c: one in function_profiler () and
one in s390_output_mi_thunk ().  I'll send a v3.



[Patch] MAINTAINERS - Add myself for write after approval and DCO

2021-07-07 Thread Gaius Mulley via Gcc-patches


Hello, I have added myself for Write After Approval and DCO.

Thanks Gaius

=

/
* MAINTAINERS: Add myself for write after approval and DCO.

2021-07-06  Gaius Mulley  

Signed-off-by: Gaius Mulley 


diff --git a/MAINTAINERS b/MAINTAINERS
index 4d6ac9c5765..48cfa3fda1d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -537,6 +537,7 @@ James A. Morrison   

 Brooks Moses   
 Dirk Mueller   
 Phil Muldoon   
+Gaius Mulley   
 Steven Munroe  
 Jun Ma 
 Szabolcs Nagy  
@@ -703,3 +704,4 @@ information.
  Matthias Kretz
  Jeff Law  
  Jeff Law  
+ Gaius Mulley  


=

regards,
Gaius


Re: [PATCH] Add gnu::diagnose_as attribute

2021-07-07 Thread Jason Merrill via Gcc-patches

On 7/5/21 10:18 AM, Matthias Kretz wrote:

On Thursday, 1 July 2021 17:18:26 CEST Jason Merrill wrote:

You probably want to adjust is_late_template_attribute to change that.


Right, I hacked is_late_template_attribute but now I only see a TYPE_DECL
passed to my attribute handler (!DECL_ALIAS_TEMPLATE_P). > I.e. I don't know how
your previous comment is supposed to help me:

On Tuesday, 22 June 2021 22:12:42 CEST Jason Merrill wrote:

Yes.  You can check that with get_underlying_template.


FWIW, I don't feel qualified to implement the diagnose_as attribute on alias
templates. The trees I've seen while testing the following test case don't
make sense to me. :(


// { dg-do compile { target c++11 } }
// { dg-options "-fdiagnostics-use-aliases -fpretty-templates" }

template  class A0 {};
template  using B0 [[gnu::diagnose_as]] = A0; // #1
template  using C0 [[gnu::diagnose_as]] = A0; // #2

template  class A1 {};
template  class A1 {};
template  using B1 [[gnu::diagnose_as]] = A1; // #3

void fn_1(int);

int main ()
{
   fn_1 (A0 ()); // { dg-error "cannot convert 'B0' to 'int'" }
   fn_1 (A1 ()); // { dg-error "cannot convert 'A1' to 'int'" }
   fn_1 (A1 ()); // { dg-error "cannot convert 'B1' to 'int'" }
}

On #1 I see !COMPLETE_TYPE_P (TREE_TYPE (*node))


Yes; a use that matches the primary template but is not the primary 
template gets a separate dependent type.



while on #3 TREE_TYPE (*node) is a complete type.


Indeed, we don't do the same thing for partial specializations.


Like I said, I don't get to see the TEMPLATE_DECL of
either #1, #2, or #3, only a TYPE_DECL whose TREE_TYPE is A0. I thus have no
idea how to reject #2.


The difference between #1 and #2 is that same_type_p (#1, 
CLASSTYPE_PRIMARY_TEMPLATE_TYPE (TREE_TYPE (#1)) is true; see the use of 
that in resolve_typename_type.


Jason



Re: Repost: [PATCH] Generate 128-bit int divide/modulus on power10.

2021-07-07 Thread Segher Boessenkool
On Wed, Jul 07, 2021 at 03:55:44PM -0400, Michael Meissner wrote:
> This patch adds support for the VDIVSQ, VDIVUQ, VMODSQ, and VMODUQ
> instructions to do 128-bit arithmetic.

> gcc/
>   PR target/100809
>   * config/rs6000/rs6000.md (udivti3): New insn.
>   (divti3): New insn.
>   (umodti3): New insn.
>   (modti3): New insn.
> 
> gcc/testsuite/
>   PR target/100809
>   * gcc.target/powerpc/p10-vdivq-vmodq.c: New test.

>  2 files changed, 61 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> 
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index e84d0311cc2..4e53cf28dde 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -3234,6 +3234,14 @@ (define_insn "udiv3"
>[(set_attr "type" "div")
> (set_attr "size" "")])
>  
> +(define_insn "udivti3"
> +  [(set (match_operand:TI 0 "altivec_register_operand" "=v")
> +(udiv:TI (match_operand:TI 1 "altivec_register_operand" "v")

(every eight leading spaces should be tabs -- multiple times here)

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> @@ -0,0 +1,27 @@
> +/* { dg-require-effective-target lp64 } */

int128, instead.  There is nothing here that needs lp64.  If there was,
that should be commented here, too.

Okay for trunk with those fixes.  Also okay for all backports.  Please
make sure it tested on all usual platforms before backporting.  Thanks!


Segher


Re: [PATCH] docs: Add 'S' to Machine Constraints for RISC-V

2021-07-07 Thread Fangrui Song

On 2021-07-02, Kito Cheng wrote:

It was undocument before, but already used in linux kernel, so LLVM
community suggest we should document that, so that make it become
supported/documented/non-internal machine constraints.

gcc/ChangeLog:

PR target/101275
* doc/md.text (Machine Constraints): Document the 'S' constraints
for RISC-V.
---
gcc/doc/md.texi | 3 +++
1 file changed, 3 insertions(+)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 00caf3844cc..b776623e8a5 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3536,6 +3536,9 @@ A 5-bit unsigned immediate for CSR access instructions.
@item A
An address that is held in a general-purpose register.

+@item S
+A constant call address.
+
@end table

@item RX---@file{config/rx/constraints.md}
--
2.31.1



Ping on this:)

'S' can be used on either a variable or a function. Does "A constant call 
address." need change?


[committed] analyzer: remove add_any_constraints_from_ssa_def_stmt

2021-07-07 Thread David Malcolm via Gcc-patches
I'm working on reimplementing -Wanalyzer-use-of-uninitialized-value, but
I ran into issues with
region_model::add_any_constraints_from_ssa_def_stmt.
This function is from the initial commit of the analyzer and walks the
SSA names finding conditions that were missed due to the GCC 10 era
region_model not retaining useful information on how values were
created; as of GCC 11 the symbolic values contain this information,
and so the conditions can be reconstructed from them instead.

region_model::add_any_constraints_from_ssa_def_stmt is a liability
when tracking uninitialized values as it requires looking up SSA
values when those values may have been purged, thus greatly complicating
detection of uses of uninitialized values.

It's simplest to eliminate it and reimplement the condition-finding
via the makeup of the svalues, which this patch does.  Doing so requires
supporting add_condition on svalues rather than just on trees, which
requires some changes to ana::state_machine and its subclasses.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as 48e8a7a677b8356df946cd12fbb215538828e747.

gcc/analyzer/ChangeLog:
* diagnostic-manager.cc (null_assignment_sm_context::get_state):
New overload.
(null_assignment_sm_context::set_next_state): New overload.
(null_assignment_sm_context::get_diagnostic_tree): New.
* engine.cc (impl_sm_context::get_state): New overload.
(impl_sm_context::set_next_state): New overload.
(impl_sm_context::get_diagnostic_tree): New overload.
(impl_region_model_context::on_condition): Convert params from
tree to const svalue *.
* exploded-graph.h (impl_region_model_context::on_condition):
Likewise.
* region-model.cc (region_model::on_call_pre): Move handling of
internal calls to before checking for get_fndecl_for_call.
(region_model::add_constraints_from_binop): New.
(region_model::add_constraint): Split out into a new overload
working on const svalue * rather than tree.  Call
add_constraints_from_binop.  Drop call to
add_any_constraints_from_ssa_def_stmt.
(region_model::add_any_constraints_from_ssa_def_stmt): Delete.
(region_model::add_any_constraints_from_gassign): Delete.
(region_model::add_any_constraints_from_gcall): Delete.
* region-model.h
(region_model::add_any_constraints_from_ssa_def_stmt): Delete.
(region_model::add_any_constraints_from_gassign): Delete.
(region_model::add_any_constraints_from_gcall): Delete.
(region_model::add_constraint): Add overload decl.
(region_model::add_constraints_from_binop): New decl.
(region_model_context::on_condition): Convert params from tree to
const svalue *.
(noop_region_model_context::on_condition): Likewise.
* sm-file.cc (fileptr_state_machine::condition): Likewise.
* sm-malloc.cc (malloc_state_machine::on_condition): Likewise.
* sm-pattern-test.cc: Include tristate.h, selftest.h,
analyzer/call-string.h, analyzer/program-point.h,
analyzer/store.h, and analyzer/region-model.h.
(pattern_test_state_machine::on_condition): Convert params from tree to
const svalue *.
* sm-sensitive.cc (sensitive_state_machine::on_condition): Delete.
* sm-signal.cc (signal_state_machine::on_condition): Delete.
* sm-taint.cc (taint_state_machine::on_condition): Convert params
from tree to const svalue *.
* sm.cc: Include tristate.h, selftest.h, analyzer/call-string.h,
analyzer/program-point.h, analyzer/store.h, and
analyzer/region-model.h.
(any_pointer_p): Add overload taking const svalue *sval.
* sm.h (any_pointer_p): Add overload taking const svalue *sval.
(state_machine::on_condition): Convert params from tree to
const svalue *.  Provide no-op default implementation.
(sm_context::get_state): Add overload taking const svalue *sval.
(sm_context::set_next_state): Likewise.
(sm_context::on_transition): Likewise.
(sm_context::get_diagnostic_tree): Likewise.
* svalue.cc (svalue::all_zeroes_p): New.
(constant_svalue::all_zeroes_p): New.
(repeated_svalue::all_zeroes_p): Convert to vfunc.
* svalue.h (svalue::all_zeroes_p): New decl.
(constant_svalue::all_zeroes_p): New decl.
(repeated_svalue::all_zeroes_p): Convert decl to vfunc.

gcc/testsuite/ChangeLog:
* gcc.dg/analyzer/pattern-test-2.c: Update expected results.
* gcc.dg/plugin/analyzer_gil_plugin.c
(gil_state_machine::on_condition): Remove.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/diagnostic-manager.cc|  35 ++
 gcc/analyzer/engine.cc|  54 +++-
 gcc/analyzer/exploded-graph.h |   4 +-
 gcc/analyzer/region-model.cc  | 304 +-

Re: [PATCH 1/2] CALL_INSN may not be a real function call.

2021-07-07 Thread Segher Boessenkool
On Wed, Jul 07, 2021 at 11:23:48PM +0800, Hongtao Liu wrote:
> On Wed, Jul 7, 2021 at 10:54 PM Segher Boessenkool
>  wrote:

[ snip some old stuff ]

> > Yeah.  This stuff needs a rethink.
> >
> > What is wrong with just using an unspec and clobbers?
> >
> It's partial and **potential clobber**,

All RTL "clobber" is always a potential clobber, it never guarantees the
existing value does not survive.  You can pass it through some unspec to
make this more explicit.  You will have to add some hook that CSE can
use to figure out what bits are conserved by your target-specific
construct, as you should have done in the first place.  This will be
much less work for you too, compared to actually checking if all
existing GCC code needs too check "FAKE_CALL_P" or not (instead of just
hoping it works now, as you do).


Segher


Re: [PATCH 1/2] CALL_INSN may not be a real function call.

2021-07-07 Thread Segher Boessenkool
On Wed, Jul 07, 2021 at 11:32:59PM +0800, Hongtao Liu wrote:
> On Wed, Jul 7, 2021 at 10:54 PM Segher Boessenkool
>  wrote:
> > So, a "FAKE_CALL" is very much a *real* call, on the RTL level, which is
> > where we are here.  But you want it to be treated differently because it
> > will eventually be replaced by different insns.
> It's CALL_INSN on the rtl level,  but it's just a normal instruction
> that it doesn't have a call stack, and it doesn't affect the control
> flow

There is no such thing as "call stack" (whatever that may mean) to do
with the RTL "call" insn.  How the return address is stored (if at all)
is up to the target.  Many do not store the return address on the stack
(for example they have an RA or LR register for it).  Those that do
store it on a stack do not all change the stack pointer.

In RTL, it *does* change the control flow.  If you don't like that,
don't use a "call" insn.  You will have to update a *lot* more code
than you did, otherwise.

> > So because of this one thing (you need to insert partial clobbers) you
> > force all kinds of unrelated code to have changes, namely, code thatt
> > needs to do something with calls, but now you do not want to have that
> > doone on some calls because you promise that call will disappear
> > eventually, and it cannot cause any problems in the mean time?
> >
> > I am not convinced.  This is not design, this is a terrible hack, this
> > is the opposite direction we should go in.
> 
> Quote from  https://gcc.gnu.org/pipermail/gcc-patches/2021-May/570634.html
> 
> > Also i grep CALL_P or CALL_INSN in GCC source codes, there are many
> > places which hold the assumption CALL_P/CALL_INSN is a real call.
> > Considering that vzeroupper is used a lot on the i386 backend, I'm a
> > bit worried that this implementation solution will be a bottomless
> > pit.
> 
> Maybe, but I think the same is true for CLOBBER_HIGH.  If we have
> a third alternative then we should consider it, but I think the
> call approach is still going to be less problematic then CLOBBER_HIGH.
> 
> The main advantage of the call approach is that the CALL_P handling
> is (mostly) conservatively correct and performance problems are just
> a one-line change.  The CLOBBER_HIGH approach instead requires
> changes to the way that passes track liveness information for
> non-call instructions (so is much more than a one-line change).
> Also, treating a CLOBBER_HIGH like a CLOBBER isn't conservatively
> correct, because other code might be relying on part of the register
> being preserved.

And this isn't a one-line change either, and it is only partial already,
and we don't know how deep the rabbit hole goes.


Segher


Re: [PATCH] rs6000: Support [u]mod3 for vector modulo insns

2021-07-07 Thread Kewen.Lin via Gcc-patches
Hi Segher,

on 2021/7/8 上午1:10, Segher Boessenkool wrote:
> Hi!
> 
> On Wed, Jul 07, 2021 at 05:03:23PM +0800, Kewen.Lin wrote:
>> This patch is to make Power10 newly introduced vector
>> modulo instructions exploited in vectorized loops, it
>> just simply renames existing define_insns as standard
>> pattern names.
> 
>>
>> Is it ok for trunk?
>>
>> BR,
>> Kewen
>> -
>> gcc/ChangeLog:
>>
>>  * config/rs6000/rs6000-builtin.def (MODS_V2DI, MODS_V4SI, MODU_V2DI,
>>  MODU_V4SI): Adjust.
>>  * config/rs6000/vsx.md (mods_): Renamed to...
>>  (mod3): ... this.
>>  (modu_): Renamed to...
>>  (umod3): ... this.
> 
> ("Rename", not "Renamed")
> 

oh, forgot to use imperative form, will fix.

> (It reads better if you put the "Adjust" entry after the rest, btw)
> 

Will switch them.  :)

> So I suppose the new testcase FAILs without these changes?
> 

Yes, it fails without these changes.

> Okay for trunk.  Thanks!
> 

Thanks for the reviews!

BR,
Kewen


Re: [PATCH] docs: Add 'S' to Machine Constraints for RISC-V

2021-07-07 Thread Palmer Dabbelt

On Wed, 07 Jul 2021 16:26:25 PDT (-0700), i...@maskray.me wrote:

On 2021-07-02, Kito Cheng wrote:

It was undocument before, but already used in linux kernel, so LLVM
community suggest we should document that, so that make it become
supported/documented/non-internal machine constraints.

gcc/ChangeLog:

PR target/101275
* doc/md.text (Machine Constraints): Document the 'S' constraints
for RISC-V.
---
gcc/doc/md.texi | 3 +++
1 file changed, 3 insertions(+)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 00caf3844cc..b776623e8a5 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3536,6 +3536,9 @@ A 5-bit unsigned immediate for CSR access instructions.
@item A
An address that is held in a general-purpose register.

+@item S
+A constant call address.
+
@end table

@item RX---@file{config/rx/constraints.md}
--
2.31.1



Ping on this:)


Thanks, I hadn't had time to look a this but it was on the list.


'S' can be used on either a variable or a function. Does "A constant call 
address." need change?


I think so: this can certainly be used to generate arbitrary constant 
addresses into assembly, but I don't remember if there was some reason 
it was listed as only applying to call addresses.  It certainly could 
just be a historical artifact, as those strings tend to bit rot when 
they're internal, but I wanted to at least take a look.


Also: IIUC we're supposed to remove the @internal when something is 
documented (though I remember having said that before, so I might be 
mistaken).


Re: [PATCH] Add gnu::diagnose_as attribute

2021-07-07 Thread Jason Merrill via Gcc-patches

On 7/7/21 4:23 AM, Matthias Kretz wrote:

On Tuesday, 22 June 2021 21:52:16 CEST Jason Merrill wrote:

2. About the namespace aliases: IIUC an attribute would currently be
rejected because of the C++ grammar. Do you want to make it valid before
WG21 officially decides how to proceed? And if you have a pointer for me
where I'd have to adjust the grammar rules, that'd help.


You will want to adjust cp_parser_namespace_alias_definition to handle
attributes like cp_parser_namespace_definition.  The latter currently
accepts attributes both before and after the name, which seems like a
good pattern to follow so it doesn't matter which WG21 chooses.
Probably best to pedwarn about C++11 attributes in both locations for
now, not just after.


This introduces an ambiguity in cp_parser_declaration. The function has to
decide whether to call cp_parser_namespace_definition or fall back to
cp_parser_block_declaration (which calls
cp_parser_namespace_alias_definition). But now the parser has to look ahead a
lot farther:

namespace foo [[whatever]] {}
namespace bar [[whatever]] = foo;

I.e. only at '{' vs. '=' can cp_parser_declaration decide to call
cp_parser_namespace_definition.

Consequently, should I really modify cp_parser_namespace_definition to handle
namespace aliases?


aliases can also appear at block scope, unlike namespace definitions, 
but you could factor out some of the alias handling to call from both 
places.



Or can/should cp_parser_declaration look ahead behind the
attribute(s)? How?


cp_parser_skip_attributes_opt

Jason



[PATCH] c++: Fix noexcept with unevaluated operand [PR101087]

2021-07-07 Thread Marek Polacek via Gcc-patches
It sounds plausible that this assert

  int f();
  static_assert(noexcept(sizeof(f(;

should pass: sizeof produces a std::size_t and its operand is not
evaluated, so it can't throw.  noexcept should only evaluate to
false for potentially evaluated operands.  Therefore I think that
check_noexcept_r shouldn't walk into operands of sizeof/decltype/
alignof/typeof.  Only checking cp_unevaluated_operand therein does
not work, because expr_noexcept_p can be called in an unevaluated
context, so I resorted to the following cp_evaluated hack.  Does
that seem acceptable?

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/101087

gcc/cp/ChangeLog:

* except.c (check_noexcept_r): Don't walk into unevaluated
operands.
(expr_noexcept_p): Use cp_evaluated.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept70.C: New test.
---
 gcc/cp/except.c | 14 +++---
 gcc/testsuite/g++.dg/cpp0x/noexcept70.C |  5 +
 2 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept70.C

diff --git a/gcc/cp/except.c b/gcc/cp/except.c
index a8cea53cf91..6f97ac40b4b 100644
--- a/gcc/cp/except.c
+++ b/gcc/cp/except.c
@@ -1033,12 +1033,15 @@ check_handlers (tree handlers)
  expression whose type is a polymorphic class type (10.3).  */
 
 static tree
-check_noexcept_r (tree *tp, int * /*walk_subtrees*/, void * /*data*/)
+check_noexcept_r (tree *tp, int *walk_subtrees, void *)
 {
   tree t = *tp;
   enum tree_code code = TREE_CODE (t);
-  if ((code == CALL_EXPR && CALL_EXPR_FN (t))
-  || code == AGGR_INIT_EXPR)
+
+  if (cp_unevaluated_operand)
+*walk_subtrees = false;
+  else if ((code == CALL_EXPR && CALL_EXPR_FN (t))
+  || code == AGGR_INIT_EXPR)
 {
   /* We can only use the exception specification of the called function
 for determining the value of a noexcept expression; we can't use
@@ -1155,6 +1158,11 @@ expr_noexcept_p (tree expr, tsubst_flags_t complain)
   if (expr == error_mark_node)
 return false;
 
+  /* Even though the operand of noexcept is an _unevaluated_ operand,
+ temporarily clearing cp_unevaluated_operand allows us to check it
+ in check_noexcept_r, to handle noexcept(sizeof(f())).  It could be
+ set when we are called in the context of synthesized_method_walk.  */
+  cp_evaluated ev;
   fn = cp_walk_tree_without_duplicates (&expr, check_noexcept_r, 0);
   if (fn)
 {
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept70.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept70.C
new file mode 100644
index 000..45a6137dd6f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept70.C
@@ -0,0 +1,5 @@
+// PR c++/101087
+// { dg-do compile { target c++11 } }
+
+int f();
+static_assert(noexcept(sizeof(f())), "");

base-commit: a110855667782dac7b674d3e328b253b3b3c919b
-- 
2.31.1



Re: PING 2 [PATCH] correct handling of variable offset minus constant in -Warray-bounds (PR 100137)

2021-07-07 Thread Marek Polacek via Gcc-patches
On Wed, Jul 07, 2021 at 02:38:11PM -0600, Martin Sebor via Gcc-patches wrote:
> On 7/7/21 1:38 AM, Richard Biener wrote:
> > On Tue, Jul 6, 2021 at 5:47 PM Martin Sebor via Gcc-patches
> >  wrote:
> > > 
> > > Ping: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573349.html
> > 
> > +  if (TREE_CODE (axstype) != UNION_TYPE)
> > 
> > what about QUAL_UNION_TYPE?  (why constrain union type accesses
> > here - note you don't seem to constrain accesses of union members here)
> 
> I didn't know a QUAL_UNION_TYPE was a thing.  Removing the test
> doesn't seem to cause any regressions so let me do that in a followup.
> 
> > 
> > +if (tree access_size = TYPE_SIZE_UNIT (axstype))
> > 
> > +  /* The byte size of the array has already been determined above
> > + based on a pointer ARG.  Set ELTSIZE to the size of the type
> > + it points to and REFTYPE to the array with the size, rounded
> > + down as necessary.  */
> > +  if (POINTER_TYPE_P (reftype))
> > +reftype = TREE_TYPE (reftype);
> > +  if (TREE_CODE (reftype) == ARRAY_TYPE)
> > +reftype = TREE_TYPE (reftype);
> > +  if (tree refsize = TYPE_SIZE_UNIT (reftype))
> > +if (TREE_CODE (refsize) == INTEGER_CST)
> > +  eltsize = wi::to_offset (refsize);
> > 
> > probably pre-existing but the pointer indirection is definitely confusing
> > me again and again given the variable is named 'reftype' - obviously
> > an access to a pointer does not have any element size.  Possibly the
> > paths arriving here ensure somehow that the only case is when
> > reftype is not the access type but a pointer to the accessed memory.
> > "jump-threading" the source might help me avoiding to trip over this
> > again and again ...
> 
> I agree (it is confusing).  There's more to simplify here.  It's on
> my to do list so let me see about this piece of code then.
> 
> > 
> > The patch removes a lot of odd code, I like that.  You know this code best
> > and it's hard to spot errors.
> > 
> > So OK, you'll deal with the fallout.
> 
> I certainly will.  Pushed in r12-2132.

I think this patch breaks bootstrap on x86_64:

In member function ‘availability varpool_node::get_availability(symtab_node*)’,
inlined from ‘availability symtab_node::get_availability(symtab_node*)’ at 
/opt/notnfs/polacek/gcc/gcc/cgraph.h:3360:63,
inlined from ‘availability symtab_node::get_availability(symtab_node*)’ at 
/opt/notnfs/polacek/gcc/gcc/cgraph.h:3355:1,
inlined from ‘symtab_node* 
symtab_node::ultimate_alias_target(availability*, symtab_node*)’ at 
/opt/notnfs/polacek/gcc/gcc/cgraph.h:3199:35,
inlined from ‘symtab_node* 
symtab_node::ultimate_alias_target(availability*, symtab_node*)’ at 
/opt/notnfs/polacek/gcc/gcc/cgraph.h:3193:1,
inlined from ‘varpool_node* 
varpool_node::ultimate_alias_target(availability*, symtab_node*)’ at 
/opt/notnfs/polacek/gcc/gcc/cgraph.h:3234:5,
inlined from ‘availability 
varpool_node::_ZN12varpool_node16get_availabilityEP11symtab_node.part.0(symtab_node*)’
 at /opt/notnfs/polacek/gcc/gcc/varpool.c:501:29:
/opt/notnfs/polacek/gcc/gcc/varpool.c:490:19: error: array subscript 
‘varpool_node[0]’ is partly outside array bounds of ‘varpool_node [0]’ 
[-Werror=array-bounds]
  490 |   if (!definition && !in_other_partition)
  |   ^~
In file included from /opt/notnfs/polacek/gcc/gcc/varpool.c:29:
/opt/notnfs/polacek/gcc/gcc/cgraph.h: In member function ‘availability 
varpool_node::_ZN12varpool_node16get_availabilityEP11symtab_node.part.0(symtab_node*)’:
/opt/notnfs/polacek/gcc/gcc/cgraph.h:1969:39: note: object 
‘varpool_node::’ of size 120
 1969 | struct GTY((tag ("SYMTAB_VARIABLE"))) varpool_node : public symtab_node
  |   ^~~~
cc1plus: all warnings being treated as errors

Marek



Re: PING 2 [PATCH] correct handling of variable offset minus constant in -Warray-bounds (PR 100137)

2021-07-07 Thread Martin Sebor via Gcc-patches

On 7/7/21 7:48 PM, Marek Polacek wrote:

On Wed, Jul 07, 2021 at 02:38:11PM -0600, Martin Sebor via Gcc-patches wrote:

On 7/7/21 1:38 AM, Richard Biener wrote:

On Tue, Jul 6, 2021 at 5:47 PM Martin Sebor via Gcc-patches
 wrote:


Ping: https://gcc.gnu.org/pipermail/gcc-patches/2021-June/573349.html


+  if (TREE_CODE (axstype) != UNION_TYPE)

what about QUAL_UNION_TYPE?  (why constrain union type accesses
here - note you don't seem to constrain accesses of union members here)


I didn't know a QUAL_UNION_TYPE was a thing.  Removing the test
doesn't seem to cause any regressions so let me do that in a followup.



+if (tree access_size = TYPE_SIZE_UNIT (axstype))

+  /* The byte size of the array has already been determined above
+ based on a pointer ARG.  Set ELTSIZE to the size of the type
+ it points to and REFTYPE to the array with the size, rounded
+ down as necessary.  */
+  if (POINTER_TYPE_P (reftype))
+reftype = TREE_TYPE (reftype);
+  if (TREE_CODE (reftype) == ARRAY_TYPE)
+reftype = TREE_TYPE (reftype);
+  if (tree refsize = TYPE_SIZE_UNIT (reftype))
+if (TREE_CODE (refsize) == INTEGER_CST)
+  eltsize = wi::to_offset (refsize);

probably pre-existing but the pointer indirection is definitely confusing
me again and again given the variable is named 'reftype' - obviously
an access to a pointer does not have any element size.  Possibly the
paths arriving here ensure somehow that the only case is when
reftype is not the access type but a pointer to the accessed memory.
"jump-threading" the source might help me avoiding to trip over this
again and again ...


I agree (it is confusing).  There's more to simplify here.  It's on
my to do list so let me see about this piece of code then.



The patch removes a lot of odd code, I like that.  You know this code best
and it's hard to spot errors.

So OK, you'll deal with the fallout.


I certainly will.  Pushed in r12-2132.


I think this patch breaks bootstrap on x86_64:

In member function ‘availability varpool_node::get_availability(symtab_node*)’,
 inlined from ‘availability symtab_node::get_availability(symtab_node*)’ at 
/opt/notnfs/polacek/gcc/gcc/cgraph.h:3360:63,
 inlined from ‘availability symtab_node::get_availability(symtab_node*)’ at 
/opt/notnfs/polacek/gcc/gcc/cgraph.h:3355:1,
 inlined from ‘symtab_node* 
symtab_node::ultimate_alias_target(availability*, symtab_node*)’ at 
/opt/notnfs/polacek/gcc/gcc/cgraph.h:3199:35,
 inlined from ‘symtab_node* 
symtab_node::ultimate_alias_target(availability*, symtab_node*)’ at 
/opt/notnfs/polacek/gcc/gcc/cgraph.h:3193:1,
 inlined from ‘varpool_node* 
varpool_node::ultimate_alias_target(availability*, symtab_node*)’ at 
/opt/notnfs/polacek/gcc/gcc/cgraph.h:3234:5,
 inlined from ‘availability 
varpool_node::_ZN12varpool_node16get_availabilityEP11symtab_node.part.0(symtab_node*)’
 at /opt/notnfs/polacek/gcc/gcc/varpool.c:501:29:
/opt/notnfs/polacek/gcc/gcc/varpool.c:490:19: error: array subscript 
‘varpool_node[0]’ is partly outside array bounds of ‘varpool_node [0]’ 
[-Werror=array-bounds]
   490 |   if (!definition && !in_other_partition)
   |   ^~
In file included from /opt/notnfs/polacek/gcc/gcc/varpool.c:29:
/opt/notnfs/polacek/gcc/gcc/cgraph.h: In member function ‘availability 
varpool_node::_ZN12varpool_node16get_availabilityEP11symtab_node.part.0(symtab_node*)’:
/opt/notnfs/polacek/gcc/gcc/cgraph.h:1969:39: note: object 
‘varpool_node::’ of size 120
  1969 | struct GTY((tag ("SYMTAB_VARIABLE"))) varpool_node : public symtab_node
   |   ^~~~
cc1plus: all warnings being treated as errors


I bootstrapped & regtested it on top of r12-2131 just before pushing
it but let me try with the top of trunk (r12-2135 as of now).

[a bit later]

The bootstrap succeeded with the same configuration settings:

  --enable-languages=ada,c,c++,d,fortran,jit,lto,objc,obj-c++ 
--enable-checking=yes --enable-host-shared --enable-valgrind-annotations


But with --enable-checking=release I was able to reproduce the error
above.  Since there is a simple way to bootstrap I'm not going to
revert the patch tonight.  I'll look into the problem tomorrow and
see if it can be easily fixed.  If not, I'll revert it then.

Martin


Re: [PATCH 1/2] CALL_INSN may not be a real function call.

2021-07-07 Thread Hongtao Liu via Gcc-patches
On Thu, Jul 8, 2021 at 7:44 AM Segher Boessenkool
 wrote:
>
> On Wed, Jul 07, 2021 at 11:23:48PM +0800, Hongtao Liu wrote:
> > On Wed, Jul 7, 2021 at 10:54 PM Segher Boessenkool
> >  wrote:
>
> [ snip some old stuff ]
>
> > > Yeah.  This stuff needs a rethink.
> > >
> > > What is wrong with just using an unspec and clobbers?
> > >
> > It's partial and **potential clobber**,
>
> All RTL "clobber" is always a potential clobber, it never guarantees the
> existing value does not survive.  You can pass it through some unspec to
> make this more explicit.  You will have to add some hook that CSE can
TARGET_INSN_CALLEE_ABI is the hook designed for this.


-- 
BR,
Hongtao


*Ping* [Patch] Fortran: Fix bind(C) character length checks

2021-07-07 Thread Burnus, Tobias
*Ping*

I intent to incorporate Sandra's suggestions, except for the beginning of line 
spacing - that's needed to avoid exceeding the 80 character line limit. I did 
not include an updated patch as just pinging is easier on a mobile during 
vacation :-)

Thanks,

Tobias

Loosemore, Sandra wrote:

On 7/1/21 11:08 AM, Tobias Burnus wrote:
> Hi all,
>
> this patch came up when discussing Sandra's TS29113 patch internally.
> There is presumably also some overlap with José's patches.
>
> This patch tries to rectify the BIND(C) CHARACTER handling on the
> diagnostic side, only. That is: what to accept and what
> to reject for which Fortran standard.
>
>
> The rules are:
>
> * [F2003-F2018] Interoperable is character(len=1)
>→ F2018, 18.3.1  Interoperability of intrinsic types
>(General, unchanged)
>
> * Fortran 2008: In some cases, const-length chars are
>permitted as well:
>→ F2018, 18.3.4  Interoperability of scalar variables
>→ F2018, 18.3.5  Interoperability of array variables
>→ F2018, 18.3.6  Interoperability of procedures and procedure interfaces
>   [= F2008, 15.3.{4,5,6}
> For global vars with bind(C), 18.3.4 + 18.3.5 applies directly (TODO:
> Add support, not in this patch)
> For passed-by ref dummy arguments, 18.3.4 + 18.3.5 are referenced in
> - F2008: R1229  proc-language-binding-spec is language-binding-spec
>   C1255 (R1229) 
> - F2018, F2018, C1554
>
> While it is not very clearly spelt out, I regard 'char parm[4]'
> interoperable with 'character(len=4) :: a', 'character(len=2) :: b(2)'
> and 'character(len=1) :: c(4)' for both global variables and for
> dummy arguments.
>
> * Fortran 2018/TS29113:  Uses additionally CFI array descriptor
>- allocatable, pointer:  must be len=:
>- nonallocatable/nonpointer: len=* → implies array descriptor also
>  for assumed-size/explicit-size/scalar arguments.
>- All which all passed by an array descriptor already without further
>  restrictions: assumed-shape, assumed-rank, i.e. len= seems
>  to be also fine
> → 18.3.6 under item (5) bullet point 2 and 3 plus (6).
>
>
> I hope I got the conditions right. I also fixed an issue with
> character(len=5) :: str – the code in trans-expr.c did crash for
> scalars  (decl.c did not check any constraints for arrays).
> I believe the condition is wrong and for len= no descriptor
> is used.
>
> Any comments, remarks?

I gave this patch a try on my TS 29113 last night.  Changing the error
messages kind of screwed up my list of FAILs, but I did see that it also
caught some invalid character arguments in
interoperability/typecodes-scalar.f90 and
interoperability/typecodes-scalar-ext.f90 (which are already broken by 2
other major gfortran bugs I still need to file PRs for).  :-S

I haven't tried to review the patch WRT correctness with the
requirements of the standard yet, but I have a few nits about error
messages

> +   /* F2018, 18.3.6 (6).  */
> +   if (!sym->ts.deferred)
> + {
> +   gfc_error ("Allocatable and pointer character dummy "
> +  "argument %qs at %L must have deferred length "
> +  "as procedure %qs is BIND(C)", sym->name,
> +  &sym->declared_at, sym->ns->proc_name->name);
> +   retval = false;
> + }

This is the error the two aforementioned test cases started giving, but
message is confusing and doesn't read well (it was a pointer dummy, not
"allocatable and pointer").  Maybe just s/and/or/, or customize the
message depending on which one it is?

> +   gfc_error ("Character dummy argument %qs at %L must be "
> +  "of constant length or assumed length, "
> +  "unless it has assumed-shape or assumed-rank, "
> +  "as procedure %qs has the BIND(C) attribute",
> +  sym->name, &sym->declared_at,
> +  sym->ns->proc_name->name);

I don't think either "assumed-shape" or "assumed-rank" should be
hyphenated in this context unless that exact hyphenation is a term of
art in the Fortran standard or other technical documentation.  In normal
English, adjective phrases are usually only hyphenated when they appear
immediately before the noun they modify; "assumed-shape array", but "an
array with assumed shape".

> +   else if (!gfc_notify_std (GFC_STD_F2018,
> + "Character dummy argument %qs at %L"
> + " with nonconstant length as "
> + "procedure %qs is BIND(C)",
> + sym->name, &sym->declared_at,
> + sym->ns->proc_name->name))
> + retval = false;
> + }

Elsewhere the convention seems to be to format strings sp