date:20211011

Re: [PATCH] regcprop: Determine subreg offset depending on endianness [PR101260]

2021-10-11 Thread Richard Biener via Gcc-patches

On Fri, Oct 8, 2021 at 1:31 PM Stefan Schulze Frielinghaus via
Gcc-patches  wrote:
>
> gcc/ChangeLog:
>
> * regcprop.c (maybe_mode_change): Determine offset relative to
> high or low part depending on endianness.
>
> Bootstrapped and regtested on IBM Z. Ok for mainline and gcc-{11,10,9}?

Is there a testcase to add?

> ---
>  gcc/regcprop.c | 11 ---
>  1 file changed, 8 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/regcprop.c b/gcc/regcprop.c
> index d2a01130fe1..0e1ac12458a 100644
> --- a/gcc/regcprop.c
> +++ b/gcc/regcprop.c
> @@ -414,9 +414,14 @@ maybe_mode_change (machine_mode orig_mode, machine_mode 
> copy_mode,
> copy_nregs, &bytes_per_reg))
> return NULL_RTX;
>poly_uint64 copy_offset = bytes_per_reg * (copy_nregs - use_nregs);
> -  poly_uint64 offset
> -   = subreg_size_lowpart_offset (GET_MODE_SIZE (new_mode) + copy_offset,
> - GET_MODE_SIZE (orig_mode));
> +  poly_uint64 offset =
> +#if WORDS_BIG_ENDIAN
> +   subreg_size_highpart_offset
> +#else
> +   subreg_size_lowpart_offset
> +#endif
> +   (GET_MODE_SIZE (new_mode) + copy_offset,
> +GET_MODE_SIZE (orig_mode));
>regno += subreg_regno_offset (regno, orig_mode, offset, new_mode);
>if (targetm.hard_regno_mode_ok (regno, new_mode))
> return gen_raw_REG (new_mode, regno);
> --
> 2.31.1
>

Re: [PATCH] options: use cl_optimization_hash.

2021-10-11 Thread Martin Liška


On 10/8/21 12:23, Martin Liška wrote:

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.


I've spoken to Honza and he approves the patch.
I'm going to install it.

Martin

Re: [PATCH][i386] Support reduc_{plus, smax, smin, umax, umin}_scal_v4qi.

2021-10-11 Thread Uros Bizjak via Gcc-patches

On Mon, Oct 11, 2021 at 8:26 AM liuhongt  wrote:
>
>   After providing expanders for reduc_umin/umax/smin/smax_scal_v4qi,
> perfomance are a little bit faster than before for reduce operations
> w/ options -O2 -march=haswell, -O2 -march=skylake-avx512
> and -Ofast -march=skylake-avx512.
>
> gcc/ChangeLog
>
> PR target/102483
> * config/i386/i386-expand.c (emit_reduc_half): Handle
> V4QImode.
> * config/i386/mmx.md (reduc__scal_v4qi): New expander.
> (reduc_plus_scal_v4qi): Ditto.
>
> gcc/testsuite/ChangeLog
>
> * gcc.target/i386/pr102483.c: New test.
> * gcc.target/i386/pr102483-2.c: New test.

LGTM.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-expand.c  |  5 ++
>  gcc/config/i386/mmx.md | 45 +
>  gcc/testsuite/gcc.target/i386/pr102483-2.c | 26 ++
>  gcc/testsuite/gcc.target/i386/pr102483.c   | 58 ++
>  4 files changed, 134 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102483-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr102483.c
>
> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> index 3e6f7d8ef7e..4bade9e 100644
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> @@ -16043,6 +16043,11 @@ emit_reduc_half (rtx dest, rtx src, int i)
>  case E_V2DFmode:
>tem = gen_vec_interleave_highv2df (dest, src, src);
>break;
> +case E_V4QImode:
> +  d = gen_reg_rtx (V1SImode);
> +  tem = gen_mmx_lshrv1si3 (d, gen_lowpart (V1SImode, src),
> +  GEN_INT (i / 2));
> +  break;
>  case E_V4HImode:
>d = gen_reg_rtx (V1DImode);
>tem = gen_mmx_lshrv1di3 (d, gen_lowpart (V1DImode, src),
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 106d41c8fd9..6c5cbcfa52c 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -3989,6 +3989,18 @@ (define_expand "reduc__scal_v4hi"
>DONE;
>  })
>
> +(define_expand "reduc__scal_v4qi"
> +  [(smaxmin:V4QI
> + (match_operand:QI 0 "register_operand")
> + (match_operand:V4QI 1 "register_operand"))]
> +  "TARGET_SSE4_1"
> +{
> +  rtx tmp = gen_reg_rtx (V4QImode);
> +  ix86_expand_reduc (gen_v4qi3, tmp, operands[1]);
> +  emit_insn (gen_vec_extractv4qiqi (operands[0], tmp, const0_rtx));
> +  DONE;
> +})
> +
>  (define_expand "reduc__scal_v4hi"
>[(umaxmin:V4HI
>   (match_operand:HI 0 "register_operand")
> @@ -4001,6 +4013,39 @@ (define_expand "reduc__scal_v4hi"
>DONE;
>  })
>
> +(define_expand "reduc__scal_v4qi"
> +  [(umaxmin:V4QI
> + (match_operand:QI 0 "register_operand")
> + (match_operand:V4QI 1 "register_operand"))]
> +  "TARGET_SSE4_1"
> +{
> +  rtx tmp = gen_reg_rtx (V4QImode);
> +  ix86_expand_reduc (gen_v4qi3, tmp, operands[1]);
> +  emit_insn (gen_vec_extractv4qiqi (operands[0], tmp, const0_rtx));
> +  DONE;
> +})
> +
> +(define_expand "reduc_plus_scal_v4qi"
> + [(plus:V4QI
> +(match_operand:QI 0 "register_operand")
> +(match_operand:V4QI 1 "register_operand"))]
> + "TARGET_SSE2"
> +{
> +  rtx op1 = gen_reg_rtx (V16QImode);
> +  emit_insn (gen_vec_setv4si_0 (lowpart_subreg (V4SImode, op1, V16QImode),
> +   CONST0_RTX (V4SImode),
> +   lowpart_subreg (SImode,
> +   operands[1],
> +   V4QImode)));
> +  rtx tmp = gen_reg_rtx (V16QImode);
> +  emit_move_insn (tmp, CONST0_RTX (V16QImode));
> +  rtx tmp2 = gen_reg_rtx (V2DImode);
> +  emit_insn (gen_sse2_psadbw (tmp2, op1, tmp));
> +  tmp2 = gen_lowpart (V16QImode, tmp2);
> +  emit_insn (gen_vec_extractv16qiqi (operands[0], tmp2, const0_rtx));
> +  DONE;
> +})
> +
>  (define_expand "usadv8qi"
>[(match_operand:V2SI 0 "register_operand")
> (match_operand:V8QI 1 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/i386/pr102483-2.c 
> b/gcc/testsuite/gcc.target/i386/pr102483-2.c
> new file mode 100644
> index 000..d477c53db08
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102483-2.c
> @@ -0,0 +1,26 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target sse4 } */
> +/* { dg-options "-O2 -msse4.1" } */
> +
> +#include "sse4_1-check.h"
> +
> +#include "pr102483.c"
> +
> +static void
> +sse4_1_test ()
> +{
> +  char p[4] = { -103, 23, 41, -56 };
> +  unsigned char up[4] = { 100, 30, 255, 9 };
> +
> +  char res = reduce_add (p);
> +  if (res != -95)
> +abort ();
> +  if (reduce_smin (p) != -103)
> +abort ();
> +  if (reduce_smax (p) != 41)
> +abort ();
> +  if (reduce_umin (up) != 9)
> +abort ();
> +  if (reduce_umax (up) != 255)
> +abort();
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr102483.c 
> b/gcc/testsuite/gcc.target/i386/pr102483.c
> new file mode 100644
> index 000..681b57598ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr102483.c
> @@ -0,0

[PATCH] x86_64: Some SUBREG related optimization tweaks to i386 backend.

2021-10-11 Thread Roger Sayle


This patch contains two SUBREG-related optimization enabling tweaks to
the x86 backend.

The first change, to ix86_expand_vector_extract, cures the strange
-march=cascadelake related non-determinism that affected my new test
cases last week.  Extracting a QImode or HImode element from an SSE
vector performs a zero-extension to SImode, which is currently
represented as:

(set (subreg:SI (reg:QI target)) (zero_extend:SI (...))

Unfortunately, the semantics of this RTL doesn't quite match what was
intended.  A set of a paradoxical subreg allows the high-bits to take
an arbitrary value (hence the non-determinism).  A more correct
representation should be:

(set (reg:SI temp) (zero_extend:SI (...))
(set (reg:QI target) (subreg:QI (reg:SI temp))

Optionally with the SUBREG rtx annotated as SUBREG_PROMOTED_VAR_P to
indicate that value is already zero-extended in the SUBREG_REG.

The second change is very similar, which is why I've included it in
this patch, where currently the early RTL optimizers can produce:

(set (reg:V?? hardreg) (subreg ...))

where this instruction may require a spill/reload from memory when
the modes aren't tieable.  Alas the presence of the hard register
prevents combine/gcse etc. optimizing this away, or reusing the result
which would increase the lifetime of the hard register before reload.

The solution is to treat vector hard registers the same way as the
x86 backend handles scalar hard registers, and only allow sets from
pseudos before register allocation, which is achieved by checking
ix86_hardreg_mov_ok.  Hence the above instruction is expanded and
maintained as:

(set (reg:V?? pseudo) (subreg ...))
(set (reg:V?? hardreg) (reg:V?? pseudo))

which allows the RTL optimizers freedom to optimize the SUBREG.


This patch has been tested on x86_64-pc-linux-gnu with "make bootstrap"
and "make -k check" with no new failures.  In theory, my recent "obvious"
regexp fix to accommodate -march=cascadelake is no longer required, but
there's no harm leaving the testsuite as it is.

Ok for mainline?


2021-10-11  Roger Sayle  

gcc/ChangeLog
* config/i386/i386-expand.c (ix86_expand_vector_move):  Use a
pseudo intermediate when moving a SUBREG into a hard register,
by checking ix86_hardreg_mov_ok.
(ix86_expand_vector_extract): Store zero-extended SImode
intermediate in a pseudo, then set target using a SUBREG_PROMOTED
annotated subreg.
* config/i386/sse.md (mov_internal): Prevent CSE creating
complex (SUBREG) sets of (vector) hard registers before reload, by
checking ix86_hardreg_mov_ok.


Thanks in advance,
Roger
--

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 4780b99..44404bd 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -617,8 +617,9 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
 
   /* Make operand1 a register if it isn't already.  */
   if (can_create_pseudo_p ()
-  && !register_operand (op0, mode)
-  && !register_operand (op1, mode))
+  && (!ix86_hardreg_mov_ok (op0, op1)
+ || (!register_operand (op0, mode)
+ && !register_operand (op1, mode
 {
   rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));
   emit_move_insn (tmp, op1);
@@ -16005,11 +16006,15 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, 
rtx vec, int elt)
   /* Let the rtl optimizers know about the zero extension performed.  */
   if (inner_mode == QImode || inner_mode == HImode)
{
+ rtx reg = gen_reg_rtx (SImode);
  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
- target = gen_lowpart (SImode, target);
+ emit_move_insn (reg, tmp);
+ tmp = gen_lowpart (inner_mode, reg);
+ SUBREG_PROMOTED_VAR_P (tmp) = 1;
+ SUBREG_PROMOTED_SET (tmp, 1);
}
 
-  emit_insn (gen_rtx_SET (target, tmp));
+  emit_move_insn (target, tmp);
 }
   else
 {
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4559b0c..e43f597 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1270,7 +1270,8 @@
 " C,,vm,v"))]
   "TARGET_SSE
&& (register_operand (operands[0], mode)
-   || register_operand (operands[1], mode))"
+   || register_operand (operands[1], mode))
+   && ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
   switch (get_attr_type (insn))
 {

Re: [PATCH] Adjust more testcases for O2 vectorization enabling.

2021-10-11 Thread Jakub Jelinek via Gcc-patches

On Sat, Oct 09, 2021 at 12:55:15PM +0800, liuhongt via Gcc-patches wrote:
> Pushed to trunk.
> 
> libgomp/ChangeLog:
> 
>   * testsuite/libgomp.c++/scan-10.C: Add option -fvect-cost-model=cheap.
>   * testsuite/libgomp.c++/scan-11.C: Ditto.
>   * testsuite/libgomp.c++/scan-12.C: Ditto.
>   * testsuite/libgomp.c++/scan-13.C: Ditto.
>   * testsuite/libgomp.c++/scan-14.C: Ditto.
>   * testsuite/libgomp.c++/scan-15.C: Ditto.
>   * testsuite/libgomp.c++/scan-16.C: Ditto.
>   * testsuite/libgomp.c++/scan-9.C: Ditto.
>   * testsuite/libgomp.c-c++-common/lastprivate-conditional-7.c: Ditto.
>   * testsuite/libgomp.c-c++-common/lastprivate-conditional-8.c: Ditto.
>   * testsuite/libgomp.c/scan-11.c: Ditto.
>   * testsuite/libgomp.c/scan-12.c: Ditto.
>   * testsuite/libgomp.c/scan-13.c: Ditto.
>   * testsuite/libgomp.c/scan-14.c: Ditto.
>   * testsuite/libgomp.c/scan-15.c: Ditto.
>   * testsuite/libgomp.c/scan-16.c: Ditto.
>   * testsuite/libgomp.c/scan-17.c: Ditto.
>   * testsuite/libgomp.c/scan-18.c: Ditto.
>   * testsuite/libgomp.c/scan-19.c: Ditto.
>   * testsuite/libgomp.c/scan-20.c: Ditto.
>   * testsuite/libgomp.c/scan-21.c: Ditto.
>   * testsuite/libgomp.c/scan-22.c: Ditto.

I don't think this is the right thing to do.
This just means that at some point between 2013 when -fsimd-cost-model has
been introduced and now -fsimd-cost-model= option at least partially stopped
working properly.
As documented, -fsimd-cost-model= overrides the -fvect-cost-model= setting
for OpenMP simd loops (loop->force_vectorize is true) if specified differently
from default.
In tree-vectorizer.h we have:
static inline bool
unlimited_cost_model (loop_p loop)
{
  if (loop != NULL && loop->force_vectorize
  && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED;
  return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED);
}
and use it in various places, but we also just use flag_vect_cost_model
in lots of places (and in one spot use flag_simd_cost_model, not sure if
we are sure it is a force_vectorize loop or what).

So, IMHO we should change the above inline function to
loop_cost_model and let it return the cost model and then just
reimplement unlimited_cost_model as
return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
and then adjust the direct uses of the flag and revert these changes.

Jakub

[Patch] libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

2021-10-11 Thread Marcel Vollweiler


Hi,

The variable omp_atv_sequential was replaced by omp_atv_serialized in
OpenMP 5.1. This was already implemented by Jakub (C/C++, commit
ea82325afec) and Tobias (Fortran, commit fff15bad1ab).

This patch adds two tests to check if omp_atv_serialized is available
(one test for C/C++ and one for Fortran). Besides that
omp_atv_sequential is marked as deprecated in C/C++ and Fortran for
OpenMP 5.1.

The patch was tested on x86_64-linux and powerpc64le-linux with nvptx
offloading and on x86_64-linux with amdgcn offloading with no regressions.

Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP
5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and
Tobias (Fortran, commit fff15bad1ab).

This patch adds two tests to check if omp_atv_serialized is available (one test
for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as
deprecated in C/C++ and Fortran for OpenMP 5.1.

libgomp/ChangeLog:

* allocator.c (omp_init_allocator): Replace omp_atv_sequential with
omp_atv_serialized.
* omp.h.in: Add deprecated flag for omp_atv_sequential.
* omp_lib.f90.in: Add deprecated flag for omp_atv_sequential.
* testsuite/libgomp.c-c++-common/alloc-10.c: New test.
* testsuite/libgomp.fortran/alloc-12.f90: New test.

diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index dce600f..deebb6a 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -82,7 +82,7 @@ omp_init_allocator (omp_memspace_handle_t memspace, int 
ntraits,
break;
  case omp_atv_contended:
  case omp_atv_uncontended:
- case omp_atv_sequential:
+ case omp_atv_serialized:
  case omp_atv_private:
data.sync_hint = traits[i].value;
break;
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index d75ee13..e57e192 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -157,7 +157,7 @@ typedef enum omp_alloctrait_value_t
   omp_atv_contended = 3,
   omp_atv_uncontended = 4,
   omp_atv_serialized = 5,
-  omp_atv_sequential = omp_atv_serialized,
+  omp_atv_sequential __GOMP_DEPRECATED_5_1 = omp_atv_serialized,
   omp_atv_private = 6,
   omp_atv_all = 7,
   omp_atv_thread = 8,
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index 1063eee..57766b5 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -810,7 +810,7 @@
 #endif
 
 #if _OPENMP >= 202011
-!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master
+!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master, omp_atv_sequential
 #endif
 
   end module omp_lib
diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c 
b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
new file mode 100644
index 000..742c64a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
@@ -0,0 +1,25 @@
+#include 
+#include 
+#include 
+
+const omp_alloctrait_t traits[]
+= { { omp_atk_alignment, 64 },
+{ omp_atk_sync_hint, omp_atv_serialized },
+{ omp_atk_fallback, omp_atv_null_fb } };
+
+int
+main ()
+{
+  omp_allocator_handle_t a;
+  int *volatile p;
+  a = omp_init_allocator (omp_default_mem_space, 3, traits);
+  if (a == omp_null_allocator)
+abort ();
+  p = (int *) omp_alloc (3072, a);
+  if uintptr_t) p) % 64) != 0)
+abort ();
+  p[0] = 1;
+  p[3071 / sizeof (int)] = 2;
+  omp_free (p, a);
+  omp_destroy_allocator (a);
+}
\ No newline at end of file
diff --git a/libgomp/testsuite/libgomp.fortran/alloc-12.f90 
b/libgomp/testsuite/libgomp.fortran/alloc-12.f90
new file mode 100644
index 000..3d10959
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/alloc-12.f90
@@ -0,0 +1,28 @@
+! { dg-additional-options "-Wall -Wextra" }
+program main
+  use omp_lib
+  use ISO_C_Binding
+  implicit none (external, type)
+  type(c_ptr) :: p
+  integer, pointer, contiguous :: ip(:)
+  type (omp_alloctrait) :: traits(3)
+  integer (omp_allocator_handle_kind) :: a
+  integer (c_ptrdiff_t) :: iptr
+
+  traits = [omp_alloctrait (omp_atk_alignment, 64), &
+omp_alloctrait (omp_atk_fallback, omp_atv_null_fb), &
+omp_alloctrait (omp_atk_sync_hint, omp_atv_serialized)]
+  a = omp_init_allocator (omp_default_mem_space, 3, traits)
+  if (a == omp_null_allocator) stop 1
+
+  p = omp_alloc (3 * c_sizeof (0), a)
+  if (.not. c_associated (p)) stop 2
+  call c_f_pointer (p, ip, [3])
+  if (mod (TRANSFER (p, iptr), 64) /= 0) &
+stop 3
+  ip(1) = 1
+  ip(2) = 2
+  ip(3) = 3
+  call omp_free (p, a)
+  call omp_destroy_allocator (a)
+end program main

Re: [Patch] libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

2021-10-11 Thread Jakub Jelinek via Gcc-patches

On Mon, Oct 11, 2021 at 11:40:54AM +0200, Marcel Vollweiler wrote:
> libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.
> 
> The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP
> 5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and
> Tobias (Fortran, commit fff15bad1ab).
> 
> This patch adds two tests to check if omp_atv_serialized is available (one 
> test
> for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as
> deprecated in C/C++ and Fortran for OpenMP 5.1.
> 
> libgomp/ChangeLog:
> 
>   * allocator.c (omp_init_allocator): Replace omp_atv_sequential with
>   omp_atv_serialized.
>   * omp.h.in: Add deprecated flag for omp_atv_sequential.
>   * omp_lib.f90.in: Add deprecated flag for omp_atv_sequential.
>   * testsuite/libgomp.c-c++-common/alloc-10.c: New test.
>   * testsuite/libgomp.fortran/alloc-12.f90: New test.

LGTM, except one nit.

> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
> +}
> \ No newline at end of file

Please make sure the file ends with a newline before committing.

Jakub

[committed] openmp: Add omp_set_num_teams, omp_get_max_teams, omp_[gs]et_teams_thread_limit

2021-10-11 Thread Jakub Jelinek via Gcc-patches

Hi!

OpenMP 5.1 adds env vars and functions to set and query new ICVs used
as fallback if thread_limit or num_teams clauses aren't specified on
teams construct.

The following patch implements those, though further work will be needed:
1) OpenMP 5.1 also changed the num_teams clause, so that it can specify
   both lower and upper limit for how many teams should be created and
   changed the meaning when only one expression is provided, instead of
   num_teams(expr) in 5.0 meaning num_teams(1:expr) in 5.1, it now means
   num_teams(expr:expr), i.e. while previously we could create 1 to expr
   teams, in 5.1 we have some low limit by default equal to the single
   expression provided and may not create fewer teams.
   For host teams (which we don't currently implement efficiently for
   NUMA hosts) we trivially satisfy it now by always honoring what the
   user asked for, but for the offloading teams I think we'll need to
   rethink the APIs; currently teams construct is just a call that returns
   and possibly lowers the number of teams; and whenever possible we try
   to evaluate num_teams/thread_limit already on the target construct
   and the GOMP_teams call just sets the number of teams to the minimum
   of provided and requested teams; for some cases e.g. where target
   is not combined with teams and num_teams expression calls some functions
   etc., we need to call those functions in the target region and so it is
   late to figure number of teams, but also hw could just limit what it
   is willing to create; in that case I'm afraid we need to run the target
   body multiple times and arrange for omp_get_team_num () returning the
   right values
2) we need to finally implement the NUMA handling for GOMP_teams_reg
3) I now realize I haven't added some testcase coverage, will do that
   incrementally
4) libgomp.texi needs updates for these new APIs, but also others like
   the allocator

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2021-10-11  Jakub Jelinek  

gcc/
* omp-low.c (omp_runtime_api_call): Handle omp_get_max_teams,
omp_[sg]et_teams_thread_limit and omp_set_num_teams.
libgomp/
* omp.h.in (omp_set_num_teams, omp_get_max_teams,
omp_set_teams_thread_limit, omp_get_teams_thread_limit): Declare.
* omp_lib.f90.in (omp_set_num_teams, omp_get_max_teams,
omp_set_teams_thread_limit, omp_get_teams_thread_limit): Declare.
* omp_lib.h.in (omp_set_num_teams, omp_get_max_teams,
omp_set_teams_thread_limit, omp_get_teams_thread_limit): Declare.
* libgomp.h (gomp_nteams_var, gomp_teams_thread_limit_var): Declare.
* libgomp.map (OMP_5.1): Export omp_get_max_teams{,_},
omp_get_teams_thread_limit{,_}, omp_set_num_teams{,_,_8_} and
omp_set_teams_thread_limit{,_,_8_}.
* icv.c (omp_set_num_teams, omp_get_max_teams,
omp_set_teams_thread_limit, omp_get_teams_thread_limit): New
functions.
* env.c (gomp_nteams_var, gomp_teams_thread_limit_var): Define.
(omp_display_env): Print OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT.
(initialize_env): Handle OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT env
vars.
* teams.c (GOMP_teams_reg): If thread_limit is not specified, use
gomp_teams_thread_limit_var as fallback if not zero.  If num_teams
is not specified, use gomp_nteams_var.
* fortran.c (omp_set_num_teams, omp_get_max_teams,
omp_set_teams_thread_limit, omp_get_teams_thread_limit): Add
ialias_redirect.
(omp_set_num_teams_, omp_set_num_teams_8_, omp_get_max_teams_,
omp_set_teams_thread_limit_, omp_set_teams_thread_limit_8_,
omp_get_teams_thread_limit_): New functions.

--- gcc/omp-low.c.jj2021-09-30 17:12:15.236586906 +0200
+++ gcc/omp-low.c   2021-10-09 14:34:21.119388958 +0200
@@ -3953,6 +3953,7 @@ omp_runtime_api_call (const_tree fndecl)
   "get_level",
   "get_max_active_levels",
   "get_max_task_priority",
+  "get_max_teams",
   "get_max_threads",
   "get_nested",
   "get_num_devices",
@@ -3965,6 +3966,7 @@ omp_runtime_api_call (const_tree fndecl)
   "get_proc_bind",
   "get_supported_active_levels",
   "get_team_num",
+  "get_teams_thread_limit",
   "get_thread_limit",
   "get_thread_num",
   "get_wtick",
@@ -3998,8 +4000,10 @@ omp_runtime_api_call (const_tree fndecl)
   "set_dynamic",
   "set_max_active_levels",
   "set_nested",
+  "set_num_teams",
   "set_num_threads",
-  "set_schedule"
+  "set_schedule",
+  "set_teams_thread_limit"
 };
 
   int mode = 0;
--- libgomp/omp.h.in.jj 2021-10-01 10:32:03.024954096 +0200
+++ libgomp/omp.h.in2021-10-09 15:06:38.173661594 +0200
@@ -261,6 +261,11 @@ extern int omp_get_max_task_priority (vo
 
 extern void omp_fulfill_event (omp_event_handle_t) __GOMP_NOTHROW;
 
+extern void omp_set_num_teams (int) __GOMP_NOTHROW;
+extern int omp_get_max

[PATCH v3] MIPS: R6: load/store can process unaligned address

2021-10-11 Thread YunQiang Su

MIPS release 6 requires the lw/ld/sw/sd can work with
unaligned address, while it can be implemented by
full hardware or trap&emulate.

Since it doesn't have to be fully done by hardware, we add a
pair of options -m(no-)unaligned-access. Kernels may need them.

gcc/ChangeLog:

* config/mips/mips.h (ISA_HAS_UNALIGNED_ACCESS):
(STRICT_ALIGNMENT): R6 can unaligned access.
* config/mips/mips.md (movmisalign): Likewise.
* config/mips/mips.opt: add -m(no-)unaligned-access
* doc/invoke.texi: Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/mips/mips.exp: add unaligned-access
* gcc.target/mips/unaligned-2.c: New test.
* gcc.target/mips/unaligned-3.c: New test.
---
 gcc/config/mips/mips.h  |  6 ++-
 gcc/config/mips/mips.md | 10 
 gcc/config/mips/mips.opt|  4 ++
 gcc/doc/invoke.texi | 10 
 gcc/testsuite/gcc.target/mips/mips.exp  |  1 +
 gcc/testsuite/gcc.target/mips/unaligned-2.c | 53 +
 gcc/testsuite/gcc.target/mips/unaligned-3.c | 53 +
 7 files changed, 136 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/unaligned-2.c
 create mode 100644 gcc/testsuite/gcc.target/mips/unaligned-3.c

diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 973372e78..34490bfc2 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -243,6 +243,10 @@ struct mips_cpu_info {
 && (mips_isa_rev >= 6 \
 || ISA_HAS_MSA))
 
+/* ISA load/store instructions can handle unaligned address */
+#define ISA_HAS_UNALIGNED_ACCESS (TARGET_UNALIGNED_ACCESS \
+&& (mips_isa_rev >= 6))
+
 /* The ISA compression flags that are currently in effect.  */
 #define TARGET_COMPRESSION (target_flags & (MASK_MIPS16 | MASK_MICROMIPS))
 
@@ -1684,7 +1688,7 @@ FP_ASM_SPEC "\
   (ISA_HAS_MSA ? BITS_PER_MSA_REG : LONG_DOUBLE_TYPE_SIZE)
 
 /* All accesses must be aligned.  */
-#define STRICT_ALIGNMENT 1
+#define STRICT_ALIGNMENT (!ISA_HAS_UNALIGNED_ACCESS)
 
 /* Define this if you wish to imitate the way many other C compilers
handle alignment of bitfields and the structures that contain
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 455b9b802..e35d57d9e 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -4459,6 +4459,16 @@ (define_insn "mov_r"
   [(set_attr "move_type" "store")
(set_attr "mode" "")])
 
+;; Unaligned direct access
+(define_expand "movmisalign"
+  [(set (match_operand:JOIN_MODE 0)
+   (match_operand:JOIN_MODE 1))]
+  "ISA_HAS_UNALIGNED_ACCESS"
+{
+  if (mips_legitimize_move (mode, operands[0], operands[1]))
+DONE;
+})
+
 ;; An instruction to calculate the high part of a 64-bit SYMBOL_ABSOLUTE.
 ;; The required value is:
 ;;
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 6af8037e9..ebb4c6164 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -404,6 +404,10 @@ mtune=
 Target RejectNegative Joined Var(mips_tune_option) ToLower 
Enum(mips_arch_opt_value)
 -mtune=PROCESSOR   Optimize the output for PROCESSOR.
 
+munaligned-access
+Target Var(TARGET_UNALIGNED_ACCESS) Init(1)
+Generate code with unaligned load store, valid for MIPS R6.
+
 muninit-const-in-rodata
 Target Var(TARGET_UNINIT_CONST_IN_RODATA)
 Put uninitialized constants in ROM (needs -membedded-data).
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 8b3ebcfbc..ffb1f4676 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1069,6 +1069,7 @@ Objective-C and Objective-C++ Dialects}.
 -mcheck-zero-division  -mno-check-zero-division @gol
 -mdivide-traps  -mdivide-breaks @gol
 -mload-store-pairs  -mno-load-store-pairs @gol
+-munaligned-access  -mno-unaligned-access @gol
 -mmemcpy  -mno-memcpy  -mlong-calls  -mno-long-calls @gol
 -mmad  -mno-mad  -mimadd  -mno-imadd  -mfused-madd  -mno-fused-madd  -nocpp 
@gol
 -mfix-24k  -mno-fix-24k @gol
@@ -25503,6 +25504,15 @@ instructions to enable load/store bonding.  This 
option is enabled by
 default but only takes effect when the selected architecture is known
 to support bonding.
 
+@item -munaligned-access
+@itemx -mno-unaligned-access
+@opindex munaligned-access
+@opindex mno-unaligned-access
+Enable (disable) direct unaligned access for MIPS Release 6.
+MIPSr6 requires load/store unaligned-access support,
+by hardware or trap&emulate.
+So @option{-mno-unaligned-access} may be needed by kernel.
+
 @item -mmemcpy
 @itemx -mno-memcpy
 @opindex mmemcpy
diff --git a/gcc/testsuite/gcc.target/mips/mips.exp 
b/gcc/testsuite/gcc.target/mips/mips.exp
index d4d4b90d8..f76ab7adc 100644
--- a/gcc/testsuite/gcc.target/mips/mips.exp
+++ b/gcc/testsuite/gcc.target/mips/mips.exp
@@ -264,6 +264,7 @@ set mips_option_groups {
 frame-header "-mframe-header-opt|-mno-frame-header-opt"
 stack-protecto

[PATCH] opts: Remove AUTODETECT_VALUE usage.

2021-10-11 Thread Martin Liška


The patch is about using OPTION_SET_P instead of a default
option value set in common.opt.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

* common.opt: Remove Init(2) for some options.
* toplev.c (process_options): Do not use AUTODETECT_VALUE, but
use rather OPTION_SET_P.
---
 gcc/common.opt | 6 +++---
 gcc/toplev.c   | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 59ecc9fbdf7..4099effcc80 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2399,7 +2399,7 @@ Common Var(flag_live_range_shrinkage) Init(0) Optimization
 Relief of register pressure through live range shrinkage.
 
 frename-registers

-Common Var(flag_rename_registers) Init(2) Optimization
+Common Var(flag_rename_registers) Optimization
 Perform a register renaming optimization pass.
 
 fschedule-fusion

@@ -2797,7 +2797,7 @@ Common Ignore
 Does nothing.  Preserved for backward compatibility.
 
 ftree-cselim

-Common Var(flag_tree_cselim) Init(2) Optimization
+Common Var(flag_tree_cselim) Optimization
 Transform condition stores into unconditional ones.
 
 ftree-switch-conversion

@@ -3158,7 +3158,7 @@ Common Var(flag_value_profile_transformations) 
Optimization
 Use expression value profiles in optimizations.
 
 fweb

-Common Var(flag_web) Init(2) Optimization
+Common Var(flag_web) Optimization
 Construct webs and split unrelated uses of single variable.
 
 ftree-builtin-call-dce

diff --git a/gcc/toplev.c b/gcc/toplev.c
index b878234f3f2..167feac2583 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1332,10 +1332,10 @@ process_options (bool no_backend)
 }
 
   /* web and rename-registers help when run after loop unrolling.  */

-  if (flag_web == AUTODETECT_VALUE)
+  if (!OPTION_SET_P (flag_web))
 flag_web = flag_unroll_loops;
 
-  if (flag_rename_registers == AUTODETECT_VALUE)

+  if (!OPTION_SET_P (flag_rename_registers))
 flag_rename_registers = flag_unroll_loops;
 
   if (flag_non_call_exceptions)

@@ -1598,7 +1598,7 @@ process_options (bool no_backend)
   debug_inline_points = 0;
 }
 
-  if (flag_tree_cselim == AUTODETECT_VALUE)

+  if (!OPTION_SET_P (flag_tree_cselim))
 {
   if (HAVE_conditional_move)
flag_tree_cselim = 1;
--
2.33.0

[PATCH] Remove usage of IRA_REGION_AUTODETECT

2021-10-11 Thread Martin Liška


Similar patch, let's rely on OPTION_SET_P and not a default
options value.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

* common.opt: Remove usage of IRA_REGION_AUTODETECT.
* flag-types.h (enum ira_region): Likewise.
* toplev.c (process_options): Use OPTION_SET_P instead of
IRA_REGION_AUTODETECT.
---
 gcc/common.opt   | 2 +-
 gcc/flag-types.h | 4 
 gcc/toplev.c | 2 +-
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 52693e226d2..59ecc9fbdf7 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1923,7 +1923,7 @@ EnumValue
 Enum(ira_algorithm) String(priority) Value(IRA_ALGORITHM_PRIORITY)
 
 fira-region=

-Common Joined RejectNegative Enum(ira_region) Var(flag_ira_region) 
Init(IRA_REGION_AUTODETECT) Optimization
+Common Joined RejectNegative Enum(ira_region) Var(flag_ira_region) 
Init(IRA_REGION_ONE) Optimization
 -fira-region=[one|all|mixed]   Set regions for IRA.
 
 Enum

diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index 5bd1f771c8b..ae0b216e8a3 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -191,10 +191,6 @@ enum ira_region
   IRA_REGION_ONE,
   IRA_REGION_ALL,
   IRA_REGION_MIXED,
-  /* This value means that there were no options -fira-region on the
- command line and that we should choose a value depending on the
- used -O option.  */
-  IRA_REGION_AUTODETECT
 };
 
 /* The options for excess precision.  */

diff --git a/gcc/toplev.c b/gcc/toplev.c
index 81748b1152a..b878234f3f2 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1319,7 +1319,7 @@ process_options (bool no_backend)
 }
 
   /* One region RA really helps to decrease the code size.  */

-  if (flag_ira_region == IRA_REGION_AUTODETECT)
+  if (!OPTION_SET_P (flag_ira_region))
 flag_ira_region
   = optimize_size || !optimize ? IRA_REGION_ONE : IRA_REGION_MIXED;
 
--

2.33.0

Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional

2021-10-11 Thread Prathamesh Kulkarni via Gcc-patches

On Fri, 8 Oct 2021 at 21:19, Richard Sandiford
 wrote:
>
> Thanks for looking at this.
>
> Prathamesh Kulkarni  writes:
> > Hi,
> > As mentioned in PR, for the following test-case:
> >
> > typedef unsigned char uint8_t;
> >
> > static inline uint8_t
> > x264_clip_uint8(uint8_t x)
> > {
> >   uint8_t t = -x;
> >   uint8_t t1 = x & ~63;
> >   return (t1 != 0) ? t : x;
> > }
> >
> > void
> > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> > {
> >   for (int x = 0; x < n*16; x++)
> > dst[x] = x264_clip_uint8(src[x]);
> > }
> >
> > -O3 -mcpu=generic+sve generates following code for the inner loop:
> >
> > .L3:
> > ld1bz0.b, p0/z, [x1, x2]
> > movprfx z2, z0
> > and z2.b, z2.b, #0xc0
> > movprfx z1, z0
> > neg z1.b, p1/m, z0.b
> > cmpeq   p2.b, p1/z, z2.b, #0
> > sel z0.b, p2, z0.b, z1.b
> > st1bz0.b, p0, [x0, x2]
> > add x2, x2, x4
> > whilelo p0.b, w2, w3
> > b.any   .L3
> >
> > The sel is redundant since we could conditionally negate z0 based on
> > the predicate
> > comparing z2 with 0.
> >
> > As suggested in the PR, the attached patch, introduces a new
> > conditional internal function .COND_NEG, and in gimple-isel replaces
> > the following sequence:
> >op2 = -op1
> >op0 = A cmp B
> >lhs = op0 ? op1 : op2
> >
> > with:
> >op0 = A inverted_cmp B
> >lhs = .COND_NEG (op0, op1, op1).
> >
> > lhs = .COD_NEG (op0, op1, op1)
> > implies
> > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.
> >
> > With patch, it generates the following code-gen:
> > .L3:
> > ld1bz0.b, p0/z, [x1, x2]
> > movprfx z1, z0
> > and z1.b, z1.b, #0xc0
> > cmpne   p1.b, p2/z, z1.b, #0
> > neg z0.b, p1/m, z0.b
> > st1bz0.b, p0, [x0, x2]
> > add x2, x2, x4
> > whilelo p0.b, w2, w3
> > b.any   .L3
> >
> > While it seems to work for this test-case, I am not entirely sure if
> > the patch is correct. Does it look in the right direction ?
>
> For binary ops we use match.pd rather than isel:
>
> (for uncond_op (UNCOND_BINARY)
>  cond_op (COND_BINARY)
>  (simplify
>   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
>   (with { tree op_type = TREE_TYPE (@4); }
>(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> && is_truth_type_for (op_type, TREE_TYPE (@0)))
> (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))
>  (simplify
>   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
>   (with { tree op_type = TREE_TYPE (@4); }
>(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> && is_truth_type_for (op_type, TREE_TYPE (@0)))
> (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))
>
> I think it'd be good to do the same here, using new (UN)COND_UNARY
> iterators.  (The iterators will only have one value to start with,
> but other unary ops could get the same treatment in future.)
Thanks for the suggestions.
The attached patch adds a pattern to match.pd to replace:
cond = a cmp b
r = cond ? x : -x
with:
cond = a inverted_cmp b
r = cond ? -x : x

Code-gen with patch for inner loop:
.L3:
ld1bz0.b, p0/z, [x1, x2]
movprfx z1, z0
and z1.b, z1.b, #0xc0
cmpne   p1.b, p2/z, z1.b, #0
neg z0.b, p1/m, z0.b
st1bz0.b, p0, [x0, x2]
add x2, x2, x4
whilelo p0.b, w2, w3
b.any   .L3

Does it look OK ?
I didn't add it under (UN)COND_UNARY since it inverts the comparison,
which we might not want to do for other unary ops ?

Also, I am not sure, how to test if target supports conditional
internal function ?
I tried to use:
(for cmp (tcc_comparison)
 icmp (inverted_tcc_comparison)
 (simplify
  (vec_cond (cmp@2 @0 @1) @3 (negate @3))
   (with { auto op_type = TREE_TYPE (@2); }
(if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type)
 && is_truth_type_for (op_type, TREE_TYPE (@0)))
  (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3)

but both the conditions seem to fail.

Thanks,
Prathamesh


>
> Richard
>
>
> >
> > Thanks,
> > Prathamesh
> >
> > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> > index 38e90933c3e..5b0dd3c1993 100644
> > --- a/gcc/gimple-isel.cc
> > +++ b/gcc/gimple-isel.cc
> > @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "optabs.h"
> >  #include "gimple-fold.h"
> >  #include "internal-fn.h"
> > +#include "fold-const.h"
> > +#include "tree-pretty-print.h"
> >
> >  /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
> > internal function based on vector type of selected expansion.
> > @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
> > return new_stmt;
> >   }
> >
> > +   /* Replace:
> > +  op2 = -op1
> > +  op0 = A cmp B
>

[PATCH] options: Fix variable tracking option processing.

2021-10-11 Thread Martin Liška


After the recent change in Optimize attribute handling, we need
finish_option function properly auto-detecting variable tracking options.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

PR debug/102585

gcc/ChangeLog:

* common.opt: Do not init flag_var_tracking* options.
* opts.c (finish_options): Handle flag_var_tracking* options.
* toplev.c (process_options): Move to opts.c.

gcc/testsuite/ChangeLog:

* gcc.dg/pr102585.c: New test.
---
 gcc/common.opt  | 14 +-
 gcc/opts.c  | 28 
 gcc/testsuite/gcc.dg/pr102585.c |  6 ++
 gcc/toplev.c| 33 +++--
 4 files changed, 42 insertions(+), 39 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr102585.c

diff --git a/gcc/common.opt b/gcc/common.opt
index 52693e226d2..ec020f4e642 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3003,19 +3003,16 @@ Common Undocumented Var(flag_use_linker_plugin)
 
 ; Positive if we should track variables, negative if we should run

 ; the var-tracking pass only to discard debug annotations, zero if
-; we're not to run it.  When flag_var_tracking == 2 (AUTODETECT_VALUE) it
-; will be set according to optimize, debug_info_level and debug_hooks
-; in process_options ().
+; we're not to run it.
 fvar-tracking
-Common Var(flag_var_tracking) Init(2) PerFunction
+Common Var(flag_var_tracking) PerFunction
 Perform variable tracking.
 
 ; Positive if we should track variables at assignments, negative if

 ; we should run the var-tracking pass only to discard debug
-; annotations.  When flag_var_tracking_assignments ==
-; AUTODETECT_VALUE it will be set according to flag_var_tracking.
+; annotations.
 fvar-tracking-assignments
-Common Var(flag_var_tracking_assignments) Init(2) PerFunction
+Common Var(flag_var_tracking_assignments) PerFunction
 Perform variable tracking by annotating assignments.
 
 ; Nonzero if we should toggle flag_var_tracking_assignments after

@@ -3026,8 +3023,7 @@ Toggle -fvar-tracking-assignments.
 
 ; Positive if we should track uninitialized variables, negative if

 ; we should run the var-tracking pass only to discard debug
-; annotations.  When flag_var_tracking_uninit == AUTODETECT_VALUE it
-; will be set according to flag_var_tracking.
+; annotations.
 fvar-tracking-uninit
 Common Var(flag_var_tracking_uninit) PerFunction
 Perform variable tracking and also tag variables that are uninitialized.
diff --git a/gcc/opts.c b/gcc/opts.c
index 2116c2991dd..eeb6b1dcc7c 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1353,6 +1353,34 @@ finish_options (struct gcc_options *opts, struct 
gcc_options *opts_set,
 SET_OPTION_IF_UNSET (opts, opts_set, flag_vect_cost_model,
 VECT_COST_MODEL_CHEAP);
 
+  /* If the user specifically requested variable tracking with tagging

+ uninitialized variables, we need to turn on variable tracking.
+ (We already determined above that variable tracking is feasible.)  */
+  if (opts->x_flag_var_tracking_uninit == 1)
+opts->x_flag_var_tracking = 1;
+
+  if (!opts_set->x_flag_var_tracking)
+opts->x_flag_var_tracking = optimize >= 1;
+
+  if (!opts_set->x_flag_var_tracking_uninit)
+opts->x_flag_var_tracking_uninit = opts->x_flag_var_tracking;
+
+  if (!opts_set->x_flag_var_tracking_assignments)
+opts->x_flag_var_tracking_assignments
+  = (opts->x_flag_var_tracking
+&& !(opts->x_flag_selective_scheduling
+ || opts->x_flag_selective_scheduling2));
+
+  if (opts->x_flag_var_tracking_assignments_toggle)
+opts->x_flag_var_tracking_assignments = 
!opts->x_flag_var_tracking_assignments;
+
+  if (opts->x_flag_var_tracking_assignments && !opts->x_flag_var_tracking)
+opts->x_flag_var_tracking = opts->x_flag_var_tracking_assignments = -1;
+
+  if (opts->x_flag_var_tracking_assignments
+  && (opts->x_flag_selective_scheduling || 
opts->x_flag_selective_scheduling2))
+warning_at (loc, 0,
+   "var-tracking-assignments changes selective scheduling");
 }
 
 #define LEFT_COLUMN	27

diff --git a/gcc/testsuite/gcc.dg/pr102585.c b/gcc/testsuite/gcc.dg/pr102585.c
new file mode 100644
index 000..efd066b4a4e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr102585.c
@@ -0,0 +1,6 @@
+/* PR debug/102585 */
+/* { dg-do compile } */
+/* { dg-options "-fvar-tracking-assignments -fno-var-tracking" } */
+
+#pragma GCC optimize 0
+void d_demangle_callback_Og() { int c = 0; }
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 81748b1152a..2f13d740b98 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1490,8 +1490,8 @@ process_options (bool no_backend)
   || !dwarf_debuginfo_p ()
   || debug_hooks->var_location == do_nothing_debug_hooks.var_location)
 {
-  if (flag_var_tracking == 1
- || flag_var_tracking_uninit == 1)
+  if ((OPTION_SET_P (flag_var_tracking) && flag_var_trac

Re: [PATCH][i386] target: support spaces in target attribute.

2021-10-11 Thread Martin Liška


On 10/4/21 23:02, Andrew Pinski wrote:

It might be useful to skip tabs for the same reason as spaces really.


Sure, be my guest.

MartinFrom b66d7be2c1b3ac286257e3df4d9796e391751bef Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Mon, 4 Oct 2021 14:06:14 +0200
Subject: [PATCH] target: support spaces in target attribute.

	PR target/102374

gcc/ChangeLog:

	* config/i386/i386-options.c (ix86_valid_target_attribute_inner_p): Strip whitespaces.
	* system.h (strip_whilespaces): New function.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr102374.c: New test.
---
 gcc/config/i386/i386-options.c   |  2 ++
 gcc/system.h | 21 +
 gcc/testsuite/gcc.target/i386/pr102374.c |  3 +++
 3 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr102374.c

diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index e7a3bd4aaea..c9523b26f49 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -1146,6 +1146,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
 	  next_optstr = NULL;
 	}
 
+  p = strip_whitespaces (p, &len);
+
   /* Recognize no-xxx.  */
   if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
 	{
diff --git a/gcc/system.h b/gcc/system.h
index adde3e264b6..17a6a553b0b 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -1305,4 +1305,25 @@ startswith (const char *str, const char *prefix)
   return strncmp (str, prefix, strlen (prefix)) == 0;
 }
 
+/* Strip white spaces from STRING with LEN length.
+   A stripped string is returned and LEN is updated accordingly.  */
+
+static inline char *
+strip_whitespaces (char *string, size_t *len)
+{
+  while (string[0] == ' ' || string[0] == '\t')
+{
+  --(*len);
+  ++string;
+}
+
+  while (string[*len - 1] == ' ' || string[*len - 1] == '\t')
+{
+  string[*len - 1] = '\0';
+  --(*len);
+}
+
+  return string;
+}
+
 #endif /* ! GCC_SYSTEM_H */
diff --git a/gcc/testsuite/gcc.target/i386/pr102374.c b/gcc/testsuite/gcc.target/i386/pr102374.c
new file mode 100644
index 000..21aa76011ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102374.c
@@ -0,0 +1,3 @@
+/* PR target/102374 */
+
+void calculate_sse(void) __attribute__ ((__target__ ("	no-avx, sse2   ")));
-- 
2.33.0

[PATCH] c++: Fix up push_local_extern_decl_alias error recovery [PR102642]

2021-10-11 Thread Jakub Jelinek via Gcc-patches

Hi!

My recent push_local_extern_decl_alias change broke error-recovery,
do_pushdecl can return error_mark_node and set_decl_tls_model can't be
called on that.  There are other code paths that store error_mark_node
into DECL_LOCAL_DECL_ALIAS, with the intent to differentiate the cases
where we haven't yet tried to push it into the namespace scope (NULL)
and one where we have tried it but it failed (error_mark_node), but looking
around, there are other spots where we call functions or do processing
which doesn't tolerate error_mark_node.

So, the first hunk with the testcase fixes the testcase, the others
fix what I've spotted and the fix was easy to figure out (there are I think
3 other spots mainly for function multiversioning).

Ok for trunk and 11.3 (where I've backported the tls fix before) if it
passes bootstrap/regtest?

2021-10-11  Jakub Jelinek  

PR c++/102642
* name-lookup.c (push_local_extern_decl_alias): Don't call
set_decl_tls_model on error_mark_node.
* decl.c (make_rtl_for_nonlocal_decl): Don't call
set_user_assembler_name on error_mark_node.
* parser.c (cp_parser_oacc_declare): Ignore DECL_LOCAL_DECL_ALIAS
if it is error_mark_node.
(cp_parser_omp_declare_target): Likewise.

* g++.dg/tls/pr102642.C: New test.

--- gcc/cp/name-lookup.c.jj 2021-10-01 10:30:07.674588541 +0200
+++ gcc/cp/name-lookup.c2021-10-11 12:43:39.261051228 +0200
@@ -3474,7 +3474,9 @@ push_local_extern_decl_alias (tree decl)
  push_nested_namespace (ns);
  alias = do_pushdecl (alias, /* hiding= */true);
  pop_nested_namespace (ns);
- if (VAR_P (decl) && CP_DECL_THREAD_LOCAL_P (decl))
+ if (VAR_P (decl)
+ && CP_DECL_THREAD_LOCAL_P (decl)
+ && alias != error_mark_node)
set_decl_tls_model (alias, DECL_TLS_MODEL (decl));
}
 }
--- gcc/cp/decl.c.jj2021-10-09 10:07:51.883704975 +0200
+++ gcc/cp/decl.c   2021-10-11 12:49:33.810977118 +0200
@@ -7373,7 +7373,8 @@ make_rtl_for_nonlocal_decl (tree decl, t
 This is horrible, as we're affecting a
 possibly-shared decl.  Again, a one-true-decl
 model breaks down.  */
- set_user_assembler_name (ns_decl, asmspec);
+ if (ns_decl != error_mark_node)
+   set_user_assembler_name (ns_decl, asmspec);
}
 }
 
--- gcc/cp/parser.c.jj  2021-10-09 10:14:24.043098112 +0200
+++ gcc/cp/parser.c 2021-10-11 12:47:21.220874667 +0200
@@ -44437,7 +44437,8 @@ cp_parser_oacc_declare (cp_parser *parse
   dependent local extern variable decls are as rare as
   hen's teeth.  */
if (auto alias = DECL_LOCAL_DECL_ALIAS (decl))
- decl = alias;
+ if (alias != error_mark_node)
+   decl = alias;
 
  if (OMP_CLAUSE_MAP_KIND (t) == GOMP_MAP_LINK)
id = get_identifier ("omp declare target link");
@@ -45665,7 +45666,8 @@ cp_parser_omp_declare_target (cp_parser
   if (VAR_OR_FUNCTION_DECL_P (t)
  && DECL_LOCAL_DECL_P (t)
  && DECL_LANG_SPECIFIC (t)
- && DECL_LOCAL_DECL_ALIAS (t))
+ && DECL_LOCAL_DECL_ALIAS (t)
+ && DECL_LOCAL_DECL_ALIAS (t) != error_mark_node)
handle_omp_declare_target_clause (c, DECL_LOCAL_DECL_ALIAS (t),
  device_type);
 }
--- gcc/testsuite/g++.dg/tls/pr102642.C.jj  2021-10-11 13:00:35.889503002 
+0200
+++ gcc/testsuite/g++.dg/tls/pr102642.C 2021-10-11 13:00:20.388724721 +0200
@@ -0,0 +1,10 @@
+// PR c++/102642
+// { dg-do compile { target c++11 } }
+
+thread_local int *z;   // { dg-message "previous declaration" }
+
+void
+foo ()
+{
+  extern thread_local int z;   // { dg-error "conflicting declaration" }
+}

Jakub

Re: [PATCH] x86_64: Some SUBREG related optimization tweaks to i386 backend.

2021-10-11 Thread Hongtao Liu via Gcc-patches

On Mon, Oct 11, 2021 at 4:55 PM Roger Sayle  wrote:
>
>
> This patch contains two SUBREG-related optimization enabling tweaks to
> the x86 backend.
>
> The first change, to ix86_expand_vector_extract, cures the strange
> -march=cascadelake related non-determinism that affected my new test
> cases last week.  Extracting a QImode or HImode element from an SSE
> vector performs a zero-extension to SImode, which is currently
> represented as:
>
> (set (subreg:SI (reg:QI target)) (zero_extend:SI (...))
>
> Unfortunately, the semantics of this RTL doesn't quite match what was
> intended.  A set of a paradoxical subreg allows the high-bits to take
> an arbitrary value (hence the non-determinism).  A more correct
> representation should be:
>
> (set (reg:SI temp) (zero_extend:SI (...))
> (set (reg:QI target) (subreg:QI (reg:SI temp))
>
> Optionally with the SUBREG rtx annotated as SUBREG_PROMOTED_VAR_P to
> indicate that value is already zero-extended in the SUBREG_REG.
>
> The second change is very similar, which is why I've included it in
> this patch, where currently the early RTL optimizers can produce:
>
> (set (reg:V?? hardreg) (subreg ...))
>
> where this instruction may require a spill/reload from memory when
> the modes aren't tieable.  Alas the presence of the hard register
> prevents combine/gcse etc. optimizing this away, or reusing the result
> which would increase the lifetime of the hard register before reload.
>
> The solution is to treat vector hard registers the same way as the
> x86 backend handles scalar hard registers, and only allow sets from
> pseudos before register allocation, which is achieved by checking
> ix86_hardreg_mov_ok.  Hence the above instruction is expanded and
> maintained as:
>
> (set (reg:V?? pseudo) (subreg ...))
> (set (reg:V?? hardreg) (reg:V?? pseudo))
>
> which allows the RTL optimizers freedom to optimize the SUBREG.
>
>
> This patch has been tested on x86_64-pc-linux-gnu with "make bootstrap"
> and "make -k check" with no new failures.  In theory, my recent "obvious"
> regexp fix to accommodate -march=cascadelake is no longer required, but
> there's no harm leaving the testsuite as it is.
>
> Ok for mainline?
>
>
> 2021-10-11  Roger Sayle  
>
> gcc/ChangeLog
> * config/i386/i386-expand.c (ix86_expand_vector_move):  Use a
> pseudo intermediate when moving a SUBREG into a hard register,
> by checking ix86_hardreg_mov_ok.

   /* Make operand1 a register if it isn't already.  */
   if (can_create_pseudo_p ()
-  && !register_operand (op0, mode)
-  && !register_operand (op1, mode))
+  && (!ix86_hardreg_mov_ok (op0, op1)
+  || (!register_operand (op0, mode)
+  && !register_operand (op1, mode
 {
   rtx tmp = ix86_gen_scratch_sse_rtx (GET_MODE (op0));

ix86_gen_scratch_sse_rtx probably returns a hard register, but here
you want a pseudo register.


> (ix86_expand_vector_extract): Store zero-extended SImode
> intermediate in a pseudo, then set target using a SUBREG_PROMOTED
> annotated subreg.
> * config/i386/sse.md (mov_internal): Prevent CSE creating
> complex (SUBREG) sets of (vector) hard registers before reload, by
> checking ix86_hardreg_mov_ok.
>
>
> Thanks in advance,
> Roger
> --
>


--
BR,
Hongtao

Re: [Patch] libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

2021-10-11 Thread Marcel Vollweiler


Hi Jakub,

Am 11.10.2021 um 11:49 schrieb Jakub Jelinek:

On Mon, Oct 11, 2021 at 11:40:54AM +0200, Marcel Vollweiler wrote:

libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP
5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and
Tobias (Fortran, commit fff15bad1ab).

This patch adds two tests to check if omp_atv_serialized is available (one test
for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as
deprecated in C/C++ and Fortran for OpenMP 5.1.

libgomp/ChangeLog:

 * allocator.c (omp_init_allocator): Replace omp_atv_sequential with
 omp_atv_serialized.
 * omp.h.in: Add deprecated flag for omp_atv_sequential.
 * omp_lib.f90.in: Add deprecated flag for omp_atv_sequential.
 * testsuite/libgomp.c-c++-common/alloc-10.c: New test.
 * testsuite/libgomp.fortran/alloc-12.f90: New test.


LGTM, except one nit.


--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
+}
\ No newline at end of file


Please make sure the file ends with a newline before committing.


Changed :)



  Jakub



Thanks,
Marcel
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

The variable omp_atv_sequential was replaced by omp_atv_serialized in OpenMP
5.1. This was already implemented by Jakub (C/C++, commit ea82325afec) and
Tobias (Fortran, commit fff15bad1ab).

This patch adds two tests to check if omp_atv_serialized is available (one test
for C/C++ and one for Fortran). Besides that omp_atv_sequential is marked as
deprecated in C/C++ and Fortran for OpenMP 5.1.

libgomp/ChangeLog:

* allocator.c (omp_init_allocator): Replace omp_atv_sequential with
omp_atv_serialized.
* omp.h.in: Add deprecated flag for omp_atv_sequential.
* omp_lib.f90.in: Add deprecated flag for omp_atv_sequential.
* testsuite/libgomp.c-c++-common/alloc-10.c: New test.
* testsuite/libgomp.fortran/alloc-12.f90: New test.

diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index dce600f..deebb6a 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -82,7 +82,7 @@ omp_init_allocator (omp_memspace_handle_t memspace, int 
ntraits,
break;
  case omp_atv_contended:
  case omp_atv_uncontended:
- case omp_atv_sequential:
+ case omp_atv_serialized:
  case omp_atv_private:
data.sync_hint = traits[i].value;
break;
diff --git a/libgomp/omp.h.in b/libgomp/omp.h.in
index d75ee13..e57e192 100644
--- a/libgomp/omp.h.in
+++ b/libgomp/omp.h.in
@@ -157,7 +157,7 @@ typedef enum omp_alloctrait_value_t
   omp_atv_contended = 3,
   omp_atv_uncontended = 4,
   omp_atv_serialized = 5,
-  omp_atv_sequential = omp_atv_serialized,
+  omp_atv_sequential __GOMP_DEPRECATED_5_1 = omp_atv_serialized,
   omp_atv_private = 6,
   omp_atv_all = 7,
   omp_atv_thread = 8,
diff --git a/libgomp/omp_lib.f90.in b/libgomp/omp_lib.f90.in
index 1063eee..57766b5 100644
--- a/libgomp/omp_lib.f90.in
+++ b/libgomp/omp_lib.f90.in
@@ -810,7 +810,7 @@
 #endif
 
 #if _OPENMP >= 202011
-!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master
+!GCC$ ATTRIBUTES DEPRECATED :: omp_proc_bind_master, omp_atv_sequential
 #endif
 
   end module omp_lib
diff --git a/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c 
b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
new file mode 100644
index 000..01ae150d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
@@ -0,0 +1,25 @@
+#include 
+#include 
+#include 
+
+const omp_alloctrait_t traits[]
+= { { omp_atk_alignment, 64 },
+{ omp_atk_sync_hint, omp_atv_serialized },
+{ omp_atk_fallback, omp_atv_null_fb } };
+
+int
+main ()
+{
+  omp_allocator_handle_t a;
+  int *volatile p;
+  a = omp_init_allocator (omp_default_mem_space, 3, traits);
+  if (a == omp_null_allocator)
+abort ();
+  p = (int *) omp_alloc (3072, a);
+  if uintptr_t) p) % 64) != 0)
+abort ();
+  p[0] = 1;
+  p[3071 / sizeof (int)] = 2;
+  omp_free (p, a);
+  omp_destroy_allocator (a);
+}
diff --git a/libgomp/testsuite/libgomp.fortran/alloc-12.f90 
b/libgomp/testsuite/libgomp.fortran/alloc-12.f90
new file mode 100644
index 000..3d10959
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/alloc-12.f90
@@ -0,0 +1,28 @@
+! { dg-additional-options "-Wall -Wextra" }
+program main
+  use omp_lib
+  use ISO_C_Binding
+  implicit none (external, type)
+  type(c_ptr) :: p
+  integer, pointer, contiguous :: ip(:)
+  type (omp_alloctrait) :: traits(3)
+  integer (omp_allocator_handle_kind) :: a
+  integer (c_ptrdiff_t) :: iptr
+
+  traits = [omp_alloctrait (omp_atk_alignment,

Re: [Patch] libgomp: Add tests for omp_atv_serialized and deprecate omp_atv_sequential.

2021-10-11 Thread Jakub Jelinek via Gcc-patches

On Mon, Oct 11, 2021 at 01:28:11PM +0200, Marcel Vollweiler wrote:
> > > libgomp/ChangeLog:
> > > 
> > >  * allocator.c (omp_init_allocator): Replace omp_atv_sequential with
> > >  omp_atv_serialized.
> > >  * omp.h.in: Add deprecated flag for omp_atv_sequential.
> > >  * omp_lib.f90.in: Add deprecated flag for omp_atv_sequential.
> > >  * testsuite/libgomp.c-c++-common/alloc-10.c: New test.
> > >  * testsuite/libgomp.fortran/alloc-12.f90: New test.
> > 
> > LGTM, except one nit.
> > 
> > > --- /dev/null
> > > +++ b/libgomp/testsuite/libgomp.c-c++-common/alloc-10.c
> > > +}
> > > \ No newline at end of file
> > 
> > Please make sure the file ends with a newline before committing.
> 
> Changed :)

Thanks, just commit it, the above was meant as "please commit
with this nit fixed", I'm sorry if it wasn't obvious.

Jakub

Re: [Patch] Fortran: Various CLASS + assumed-rank fixed [PR102541]

2021-10-11 Thread Tobias Burnus


Hi Harald,

On 10.10.21 21:27, Harald Anlauf via Fortran wrote:

just some random remarks from initially browsing your patch.

Thanks for browsing the patch :-)

- leftover from debugging?

Yes.

- code that could be shortened/made slightly more readable:
...
Is there a reason to not use strcmp (comp->name, "_data") == 0?


Just (pre-mature) optimization. I think the latter is clearer; I will
change it.

Tobias


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

RE: [PATCH]middle-end convert negate + right shift into compare greater.

2021-10-11 Thread Tamar Christina via Gcc-patches

Hi all,

Here's a new version of the patch.

> >>> " If an exceptional condition occurs during the evaluation of an
> >>> expression
> >> (that is, if the result is not mathematically defined or not in the
> >> range of representable values for its type), the behavior is undefined."
> >>>
> >>> So it should still be acceptable to do in this case.
> >>
> >> -fwrapv
> >
> > If I understand correctly, you're happy with this is I guard it on ! 
> > flag_wrapv ?
> 
> I did some more digging.  Right shift of a negative value is IMP_DEF (not
> UNDEF - this keeps catching me out).  So yes, wrapping this with !wrapv
> would address my concern.
> 
> I've not reviewed the patch itself, though.  I've never even written a patch
> for match.pd, so don't feel qualified to do that.

No problem, thanks for catching this! I'm sure one of the Richards will review 
it when
they have a chance.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* match.pd: New negate+shift pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/signbit-2.c: New test.
* gcc.dg/signbit-3.c: New test.
* gcc.target/aarch64/signbit-1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/match.pd b/gcc/match.pd
index 
7d2a24dbc5e9644a09968f877e12a824d8ba1caa..3d48eda826f889483a83267409c3f278ee907b57
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -826,6 +826,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 { tree utype = unsigned_type_for (type); }
 (convert (rshift (lshift (convert:utype @0) @2) @3))
 
+/* Fold (-x >> C) into x > 0 where C = precision(type) - 1.  */
+(for cst (INTEGER_CST VECTOR_CST)
+ (simplify
+  (rshift (negate:s @0) cst@1)
+   (if (!flag_wrapv)
+(with { tree ctype = TREE_TYPE (@0);
+   tree stype = TREE_TYPE (@1);
+   tree bt = truth_type_for (ctype); }
+ (switch
+  /* Handle scalar case.  */
+  (if (INTEGRAL_TYPE_P (ctype)
+  && !VECTOR_TYPE_P (ctype)
+  && !TYPE_UNSIGNED (ctype)
+  && canonicalize_math_after_vectorization_p ()
+  && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+   (convert:bt (gt:bt @0 { build_zero_cst (stype); })))
+  /* Handle vector case with a scalar immediate.  */
+  (if (VECTOR_INTEGER_TYPE_P (ctype)
+  && !VECTOR_TYPE_P (stype)
+  && !TYPE_UNSIGNED (ctype)
+   && wi::eq_p (wi::to_wide (@1), TYPE_PRECISION (stype) - 1))
+   (convert:bt (gt:bt @0 { build_zero_cst (ctype); })))
+  /* Handle vector case with a vector immediate.   */
+  (if (VECTOR_INTEGER_TYPE_P (ctype)
+  && VECTOR_TYPE_P (stype)
+  && !TYPE_UNSIGNED (ctype)
+  && uniform_vector_p (@1))
+   (with { tree cst = vector_cst_elt (@1, 0);
+  tree t = TREE_TYPE (cst); }
+(if (wi::eq_p (wi::to_wide (cst), TYPE_PRECISION (t) - 1))
+ (convert:bt (gt:bt @0 { build_zero_cst (ctype); }))
+
 /* Fold (C1/X)*C2 into (C1*C2)/X.  */
 (simplify
  (mult (rdiv@3 REAL_CST@0 @1) REAL_CST@2)
diff --git a/gcc/testsuite/gcc.dg/signbit-2.c b/gcc/testsuite/gcc.dg/signbit-2.c
new file mode 100644
index 
..fc0157cbc5c7996b481f2998bc30176c96a669bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-2.c
@@ -0,0 +1,19 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps -fdump-tree-optimized" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 30;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+\{ 0, 0, 0, 0 \}} 1 optimized } } 
*/
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.dg/signbit-3.c b/gcc/testsuite/gcc.dg/signbit-3.c
new file mode 100644
index 
..19e9c06c349b3287610f817628f00938ece60bf7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/signbit-3.c
@@ -0,0 +1,13 @@
+/* { dg-do assemble } */
+/* { dg-options "-O1 --save-temps -fdump-tree-optimized" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+/* { dg-final { scan-tree-dump-times {\s+>\s+0;} 1 optimized } } */
+/* { dg-final { scan-tree-dump-not {\s+>>\s+31} optimized } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c 
b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
new file mode 100644
index 
..3ebfb0586f37de29cf58635b27fe48503714447e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c
@@ -0,0 +1,18 @@
+/* { dg-do assemble } */
+/* { dg-options "-O3 --save-temps" } */
+
+#include 
+
+void fun1(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++)
+  x[i] = (-x[i]) >> 31;
+}
+
+void fun2(int32_t *x, int n)
+{
+for (int i = 0; i < (n & -16); i++

Re: [PATCH] Adjust testcase for O2 vectorization enabling.

2021-10-11 Thread H.J. Lu via Gcc-patches

On Sun, Oct 10, 2021 at 10:19 PM liuhongt  wrote:
>
> gcc/testsuite/ChangeLog:
>
> PR middle-end/102669
> * gnat.dg/unroll1.adb: Add -fno-tree-vectorize.
> ---
>  gcc/testsuite/gnat.dg/unroll1.adb | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/gnat.dg/unroll1.adb 
> b/gcc/testsuite/gnat.dg/unroll1.adb
> index 34d8a8f3f38..8b732dd8f44 100644
> --- a/gcc/testsuite/gnat.dg/unroll1.adb
> +++ b/gcc/testsuite/gnat.dg/unroll1.adb
> @@ -1,5 +1,5 @@
>  -- { dg-do compile }
> --- { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll-details 
> -fdump-tree-cunrolli-details" }
> +-- { dg-options "-O2 -funroll-all-loops -fno-tree-vectorize 
> -fdump-rtl-loop2_unroll-details -fdump-tree-cunrolli-details" }
>
>  package body Unroll1 is

Should no-unroll pragma work with -ftree-vectorize?


-- 
H.J.

Re: [PATCH] regcprop: Determine subreg offset depending on endianness [PR101260]

2021-10-11 Thread Stefan Schulze Frielinghaus via Gcc-patches

On Mon, Oct 11, 2021 at 09:38:36AM +0200, Richard Biener wrote:
> On Fri, Oct 8, 2021 at 1:31 PM Stefan Schulze Frielinghaus via
> Gcc-patches  wrote:
> >
> > gcc/ChangeLog:
> >
> > * regcprop.c (maybe_mode_change): Determine offset relative to
> > high or low part depending on endianness.
> >
> > Bootstrapped and regtested on IBM Z. Ok for mainline and gcc-{11,10,9}?
> 
> Is there a testcase to add?

I've updated the patch and added the testcase from the PR.

> 
> > ---
> >  gcc/regcprop.c | 11 ---
> >  1 file changed, 8 insertions(+), 3 deletions(-)
> >
> > diff --git a/gcc/regcprop.c b/gcc/regcprop.c
> > index d2a01130fe1..0e1ac12458a 100644
> > --- a/gcc/regcprop.c
> > +++ b/gcc/regcprop.c
> > @@ -414,9 +414,14 @@ maybe_mode_change (machine_mode orig_mode, 
> > machine_mode copy_mode,
> > copy_nregs, &bytes_per_reg))
> > return NULL_RTX;
> >poly_uint64 copy_offset = bytes_per_reg * (copy_nregs - use_nregs);
> > -  poly_uint64 offset
> > -   = subreg_size_lowpart_offset (GET_MODE_SIZE (new_mode) + 
> > copy_offset,
> > - GET_MODE_SIZE (orig_mode));
> > +  poly_uint64 offset =
> > +#if WORDS_BIG_ENDIAN
> > +   subreg_size_highpart_offset
> > +#else
> > +   subreg_size_lowpart_offset
> > +#endif
> > +   (GET_MODE_SIZE (new_mode) + copy_offset,
> > +GET_MODE_SIZE (orig_mode));
> >regno += subreg_regno_offset (regno, orig_mode, offset, new_mode);
> >if (targetm.hard_regno_mode_ok (regno, new_mode))
> > return gen_raw_REG (new_mode, regno);
> > --
> > 2.31.1
> >
>From 299959788321e21c27f0d4a6d437a586c5f6c92e Mon Sep 17 00:00:00 2001
From: Stefan Schulze Frielinghaus 
Date: Mon, 4 Oct 2021 09:36:21 +0200
Subject: [PATCH] regcprop: Determine subreg offset depending on endianness
 [PR101260]

gcc/ChangeLog:

* regcprop.c (maybe_mode_change): Determine offset relative to
high or low part depending on endianness.

gcc/testsuite/ChangeLog:

* gcc.dg/pr101260.c: New test.
---
 gcc/regcprop.c  | 11 ++--
 gcc/testsuite/gcc.dg/pr101260.c | 49 +
 2 files changed, 57 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr101260.c

diff --git a/gcc/regcprop.c b/gcc/regcprop.c
index d2a01130fe1..0e1ac12458a 100644
--- a/gcc/regcprop.c
+++ b/gcc/regcprop.c
@@ -414,9 +414,14 @@ maybe_mode_change (machine_mode orig_mode, machine_mode 
copy_mode,
copy_nregs, &bytes_per_reg))
return NULL_RTX;
   poly_uint64 copy_offset = bytes_per_reg * (copy_nregs - use_nregs);
-  poly_uint64 offset
-   = subreg_size_lowpart_offset (GET_MODE_SIZE (new_mode) + copy_offset,
- GET_MODE_SIZE (orig_mode));
+  poly_uint64 offset =
+#if WORDS_BIG_ENDIAN
+   subreg_size_highpart_offset
+#else
+   subreg_size_lowpart_offset
+#endif
+   (GET_MODE_SIZE (new_mode) + copy_offset,
+GET_MODE_SIZE (orig_mode));
   regno += subreg_regno_offset (regno, orig_mode, offset, new_mode);
   if (targetm.hard_regno_mode_ok (regno, new_mode))
return gen_raw_REG (new_mode, regno);
diff --git a/gcc/testsuite/gcc.dg/pr101260.c b/gcc/testsuite/gcc.dg/pr101260.c
new file mode 100644
index 000..0e9ec4e203a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr101260.c
@@ -0,0 +1,49 @@
+/* PR rtl-optimization/101260 */
+/* { dg-do run } */
+/* { dg-options -O1 } */
+struct a {
+  unsigned b : 7;
+  int c;
+  int d;
+  short e;
+} p, *q = &p;
+int f, g, h, i, r, s;
+static short j[8][1][6] = {0};
+char k[7];
+short l, m;
+int *n;
+int **o = &n;
+void t() {
+  for (; f;)
+;
+}
+static struct a u(int x) {
+  struct a a = {4, 8, 5, 4};
+  for (; i <= 6; i++) {
+struct a v = {0};
+for (; l; l++)
+  h = 0;
+for (; h >= 0; h--) {
+  struct a *w;
+  j[i];
+  w = &p;
+  s = 0;
+  for (; s < 3; s++) {
+r ^= x;
+m = j[i][g][h] == (k[g] = g);
+*w = v;
+  }
+  r = 2;
+  for (; r; r--)
+*o = &r;
+}
+  }
+  t();
+  return a;
+}
+int main() {
+  *q = u(636);
+  if (p.b != 4)
+__builtin_abort ();
+  return 0;
+}
-- 
2.31.1

[PATCH] vectorizer: Fix up -fsimd-cost-model= handling

2021-10-11 Thread Jakub Jelinek via Gcc-patches

On Mon, Oct 11, 2021 at 11:28:55AM +0200, Jakub Jelinek via Gcc-patches wrote:
> I don't think this is the right thing to do.
> This just means that at some point between 2013 when -fsimd-cost-model has
> been introduced and now -fsimd-cost-model= option at least partially stopped
> working properly.
> As documented, -fsimd-cost-model= overrides the -fvect-cost-model= setting
> for OpenMP simd loops (loop->force_vectorize is true) if specified differently
> from default.
> In tree-vectorizer.h we have:
> static inline bool
> unlimited_cost_model (loop_p loop)
> {
>   if (loop != NULL && loop->force_vectorize
>   && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
> return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED;
>   return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED);
> }
> and use it in various places, but we also just use flag_vect_cost_model
> in lots of places (and in one spot use flag_simd_cost_model, not sure if
> we are sure it is a force_vectorize loop or what).
> 
> So, IMHO we should change the above inline function to
> loop_cost_model and let it return the cost model and then just
> reimplement unlimited_cost_model as
> return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
> and then adjust the direct uses of the flag and revert these changes.

Here is a patch that implements it.
Ok for trunk if it passes bootstrap/regtest?

2021-10-11  Jakub Jelinek  

gcc/
* tree-vectorizer.h (loop_cost_model): New function.
(unlimited_cost_model): Use it.
* tree-vect-loop.c (vect_analyze_loop_costing): Use loop_cost_model
call instead of flag_vect_cost_model.
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Likewise.
(vect_prune_runtime_alias_test_list): Likewise.  Also use it instead
of flag_simd_cost_model.
libgomp/
* testsuite/libgomp.c/scan-11.c: Remove option -fvect-cost-model=cheap.
* testsuite/libgomp.c/scan-12.c: Likewise.
* testsuite/libgomp.c/scan-13.c: Likewise.
* testsuite/libgomp.c/scan-14.c: Likewise.
* testsuite/libgomp.c/scan-15.c: Likewise.
* testsuite/libgomp.c/scan-16.c: Likewise.
* testsuite/libgomp.c/scan-17.c: Likewise.
* testsuite/libgomp.c/scan-18.c: Likewise.
* testsuite/libgomp.c/scan-19.c: Likewise.
* testsuite/libgomp.c/scan-20.c: Likewise.
* testsuite/libgomp.c/scan-21.c: Likewise.
* testsuite/libgomp.c/scan-22.c: Likewise.
* testsuite/libgomp.c++/scan-9.C: Likewise.
* testsuite/libgomp.c++/scan-10.C: Likewise.
* testsuite/libgomp.c++/scan-11.C: Likewise.
* testsuite/libgomp.c++/scan-12.C: Likewise.
* testsuite/libgomp.c++/scan-13.C: Likewise.
* testsuite/libgomp.c++/scan-14.C: Likewise.
* testsuite/libgomp.c++/scan-15.C: Likewise.
* testsuite/libgomp.c++/scan-16.C: Likewise.

--- gcc/tree-vectorizer.h.jj2021-09-27 10:47:15.839084866 +0200
+++ gcc/tree-vectorizer.h   2021-10-11 13:46:55.169767481 +0200
@@ -1701,14 +1701,22 @@ get_dr_vinfo_offset (vec_info *vinfo,
 }
 
 
+/* Return the vect cost model for LOOP.  */
+static inline enum vect_cost_model
+loop_cost_model (loop_p loop)
+{
+  if (loop != NULL
+  && loop->force_vectorize
+  && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
+return flag_simd_cost_model;
+  return flag_vect_cost_model;
+}
+
 /* Return true if the vect cost model is unlimited.  */
 static inline bool
 unlimited_cost_model (loop_p loop)
 {
-  if (loop != NULL && loop->force_vectorize
-  && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
-return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED;
-  return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED);
+  return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
 }
 
 /* Return true if the loop described by LOOP_VINFO is fully-masked and
--- gcc/tree-vect-loop.c.jj 2021-09-22 09:25:15.199030463 +0200
+++ gcc/tree-vect-loop.c2021-10-11 13:48:33.183366790 +0200
@@ -1850,7 +1850,7 @@ vect_analyze_loop_costing (loop_vec_info
 
   /* If using the "very cheap" model. reject cases in which we'd keep
  a copy of the scalar code (even if we might be able to vectorize it).  */
-  if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+  if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
   && (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
  || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
  || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
@@ -1922,7 +1922,7 @@ vect_analyze_loop_costing (loop_vec_info
   /* If the vector loop needs multiple iterations to be beneficial then
  things are probably too close to call, and the conservative thing
  would be to stick with the scalar code.  */
-  if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
+  if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
   && min_profitable_estimate > (int) vect_vf_for_cost (loop_vinfo))
 {
   i

Re: [PATCH] vectorizer: Fix up -fsimd-cost-model= handling

2021-10-11 Thread Richard Biener via Gcc-patches

On Mon, 11 Oct 2021, Jakub Jelinek wrote:

> On Mon, Oct 11, 2021 at 11:28:55AM +0200, Jakub Jelinek via Gcc-patches wrote:
> > I don't think this is the right thing to do.
> > This just means that at some point between 2013 when -fsimd-cost-model has
> > been introduced and now -fsimd-cost-model= option at least partially stopped
> > working properly.
> > As documented, -fsimd-cost-model= overrides the -fvect-cost-model= setting
> > for OpenMP simd loops (loop->force_vectorize is true) if specified 
> > differently
> > from default.
> > In tree-vectorizer.h we have:
> > static inline bool
> > unlimited_cost_model (loop_p loop)
> > {
> >   if (loop != NULL && loop->force_vectorize
> >   && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
> > return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED;
> >   return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED);
> > }
> > and use it in various places, but we also just use flag_vect_cost_model
> > in lots of places (and in one spot use flag_simd_cost_model, not sure if
> > we are sure it is a force_vectorize loop or what).
> > 
> > So, IMHO we should change the above inline function to
> > loop_cost_model and let it return the cost model and then just
> > reimplement unlimited_cost_model as
> > return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
> > and then adjust the direct uses of the flag and revert these changes.
> 
> Here is a patch that implements it.
> Ok for trunk if it passes bootstrap/regtest?

OK.

I'll note that we have #pragma GCC vect set force_vectorize as well
so we'd eventually want the cost model to be used stored in struct loop?
I suppose different -fvect-cost-model settings also prevent inlining
at the moment (and we could likely handle more opt settings as to be
reflected into struct loop to lessen issues like that).

Thanks,
Richard.

> 2021-10-11  Jakub Jelinek  
> 
> gcc/
>   * tree-vectorizer.h (loop_cost_model): New function.
>   (unlimited_cost_model): Use it.
>   * tree-vect-loop.c (vect_analyze_loop_costing): Use loop_cost_model
>   call instead of flag_vect_cost_model.
>   * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Likewise.
>   (vect_prune_runtime_alias_test_list): Likewise.  Also use it instead
>   of flag_simd_cost_model.
> libgomp/
>   * testsuite/libgomp.c/scan-11.c: Remove option -fvect-cost-model=cheap.
>   * testsuite/libgomp.c/scan-12.c: Likewise.
>   * testsuite/libgomp.c/scan-13.c: Likewise.
>   * testsuite/libgomp.c/scan-14.c: Likewise.
>   * testsuite/libgomp.c/scan-15.c: Likewise.
>   * testsuite/libgomp.c/scan-16.c: Likewise.
>   * testsuite/libgomp.c/scan-17.c: Likewise.
>   * testsuite/libgomp.c/scan-18.c: Likewise.
>   * testsuite/libgomp.c/scan-19.c: Likewise.
>   * testsuite/libgomp.c/scan-20.c: Likewise.
>   * testsuite/libgomp.c/scan-21.c: Likewise.
>   * testsuite/libgomp.c/scan-22.c: Likewise.
>   * testsuite/libgomp.c++/scan-9.C: Likewise.
>   * testsuite/libgomp.c++/scan-10.C: Likewise.
>   * testsuite/libgomp.c++/scan-11.C: Likewise.
>   * testsuite/libgomp.c++/scan-12.C: Likewise.
>   * testsuite/libgomp.c++/scan-13.C: Likewise.
>   * testsuite/libgomp.c++/scan-14.C: Likewise.
>   * testsuite/libgomp.c++/scan-15.C: Likewise.
>   * testsuite/libgomp.c++/scan-16.C: Likewise.
> 
> --- gcc/tree-vectorizer.h.jj  2021-09-27 10:47:15.839084866 +0200
> +++ gcc/tree-vectorizer.h 2021-10-11 13:46:55.169767481 +0200
> @@ -1701,14 +1701,22 @@ get_dr_vinfo_offset (vec_info *vinfo,
>  }
>  
>  
> +/* Return the vect cost model for LOOP.  */
> +static inline enum vect_cost_model
> +loop_cost_model (loop_p loop)
> +{
> +  if (loop != NULL
> +  && loop->force_vectorize
> +  && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
> +return flag_simd_cost_model;
> +  return flag_vect_cost_model;
> +}
> +
>  /* Return true if the vect cost model is unlimited.  */
>  static inline bool
>  unlimited_cost_model (loop_p loop)
>  {
> -  if (loop != NULL && loop->force_vectorize
> -  && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
> -return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED;
> -  return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED);
> +  return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
>  }
>  
>  /* Return true if the loop described by LOOP_VINFO is fully-masked and
> --- gcc/tree-vect-loop.c.jj   2021-09-22 09:25:15.199030463 +0200
> +++ gcc/tree-vect-loop.c  2021-10-11 13:48:33.183366790 +0200
> @@ -1850,7 +1850,7 @@ vect_analyze_loop_costing (loop_vec_info
>  
>/* If using the "very cheap" model. reject cases in which we'd keep
>   a copy of the scalar code (even if we might be able to vectorize it).  
> */
> -  if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
> +  if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
>&& (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
> || LOOP_VINFO_PEELI

Re: [PATCH] vectorizer: Fix up -fsimd-cost-model= handling

2021-10-11 Thread Hongtao Liu via Gcc-patches

On Monday, October 11, 2021, Jakub Jelinek via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

> On Mon, Oct 11, 2021 at 11:28:55AM +0200, Jakub Jelinek via Gcc-patches
> wrote:
> > I don't think this is the right thing to do.
> > This just means that at some point between 2013 when -fsimd-cost-model
> has
> > been introduced and now -fsimd-cost-model= option at least partially
> stopped
> > working properly.
> > As documented, -fsimd-cost-model= overrides the -fvect-cost-model=
> setting
> > for OpenMP simd loops (loop->force_vectorize is true) if specified
> differently
> > from default.
> > In tree-vectorizer.h we have:
> > static inline bool
> > unlimited_cost_model (loop_p loop)
> > {
> >   if (loop != NULL && loop->force_vectorize
> >   && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
> > return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED;
> >   return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED);
> > }
> > and use it in various places, but we also just use flag_vect_cost_model
> > in lots of places (and in one spot use flag_simd_cost_model, not sure if
> > we are sure it is a force_vectorize loop or what).
> >
> > So, IMHO we should change the above inline function to
> > loop_cost_model and let it return the cost model and then just
> > reimplement unlimited_cost_model as
> > return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
> > and then adjust the direct uses of the flag and revert these changes.
>
> Here is a patch that implements it.
> Ok for trunk if it passes bootstrap/regtest?
>
> 2021-10-11  Jakub Jelinek  
>
> gcc/
> * tree-vectorizer.h (loop_cost_model): New function.
> (unlimited_cost_model): Use it.
> * tree-vect-loop.c (vect_analyze_loop_costing): Use loop_cost_model
> call instead of flag_vect_cost_model.
> * tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
> Likewise.
> (vect_prune_runtime_alias_test_list): Likewise.  Also use it
> instead
> of flag_simd_cost_model.
> libgomp/
> * testsuite/libgomp.c/scan-11.c: Remove option
> -fvect-cost-model=cheap.
> * testsuite/libgomp.c/scan-12.c: Likewise.
> * testsuite/libgomp.c/scan-13.c: Likewise.
> * testsuite/libgomp.c/scan-14.c: Likewise.
> * testsuite/libgomp.c/scan-15.c: Likewise.
> * testsuite/libgomp.c/scan-16.c: Likewise.
> * testsuite/libgomp.c/scan-17.c: Likewise.
> * testsuite/libgomp.c/scan-18.c: Likewise.
> * testsuite/libgomp.c/scan-19.c: Likewise.
> * testsuite/libgomp.c/scan-20.c: Likewise.
> * testsuite/libgomp.c/scan-21.c: Likewise.
> * testsuite/libgomp.c/scan-22.c: Likewise.
> * testsuite/libgomp.c++/scan-9.C: Likewise.
> * testsuite/libgomp.c++/scan-10.C: Likewise.
> * testsuite/libgomp.c++/scan-11.C: Likewise.
> * testsuite/libgomp.c++/scan-12.C: Likewise.
> * testsuite/libgomp.c++/scan-13.C: Likewise.
> * testsuite/libgomp.c++/scan-14.C: Likewise.
> * testsuite/libgomp.c++/scan-15.C: Likewise.
> * testsuite/libgomp.c++/scan-16.C: Likewise.


Also for gcc.dg/gomp/simd-2.c,  gcc.dg/gomp/simd-3.c

--- gcc/tree-vectorizer.h.jj2021-09-27 10:47:15.839084866 +0200
> +++ gcc/tree-vectorizer.h   2021-10-11 13:46:55.169767481 +0200
> @@ -1701,14 +1701,22 @@ get_dr_vinfo_offset (vec_info *vinfo,
>  }
>
>
> +/* Return the vect cost model for LOOP.  */
> +static inline enum vect_cost_model
> +loop_cost_model (loop_p loop)
> +{
> +  if (loop != NULL
> +  && loop->force_vectorize
> +  && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
> +return flag_simd_cost_model;
> +  return flag_vect_cost_model;
> +}
> +
>  /* Return true if the vect cost model is unlimited.  */
>  static inline bool
>  unlimited_cost_model (loop_p loop)
>  {
> -  if (loop != NULL && loop->force_vectorize
> -  && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
> -return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED;
> -  return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED);
> +  return loop_cost_model (loop) == VECT_COST_MODEL_UNLIMITED;
>  }
>
>  /* Return true if the loop described by LOOP_VINFO is fully-masked and
> --- gcc/tree-vect-loop.c.jj 2021-09-22 09:25:15.199030463 +0200
> +++ gcc/tree-vect-loop.c2021-10-11 13:48:33.183366790 +0200
> @@ -1850,7 +1850,7 @@ vect_analyze_loop_costing (loop_vec_info
>
>/* If using the "very cheap" model. reject cases in which we'd keep
>   a copy of the scalar code (even if we might be able to vectorize
> it).  */
> -  if (flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP
> +  if (loop_cost_model (loop) == VECT_COST_MODEL_VERY_CHEAP
>&& (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
>   || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
>   || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
> @@ -1922,7 +1922,7 @@ vect_analyze_loop_costing (loop_vec_info
>/* If the vector loop needs multiple it

Re: [Patch 1/7, Arm, GCC] Add Armv8.1-M Mainline target feature +pacbti.

2021-10-11 Thread Richard Earnshaw via Gcc-patches


On 08/10/2021 13:17, Tejas Belagod via Gcc-patches wrote:

Hi,

This patch adds the -march feature +pacbti to Armv8.1-M Mainline.
This feature enables pointer signing and authentication instructions
on M-class architectures.

Tested on arm-none-eabi. OK for trunk?

2021-10-04  Tejas Belagod  

gcc/Changelog:

* config/arm/arm-cpus.in: Define new feature pacbti.
* config/arm/arm.h (TARGET_HAVE_PACBTI): New.



"+pacbti" needs to be documented in invoke.texi at the appropriate place.

R.

[RFC] Port git gcc-descr to Python

2021-10-11 Thread Martin Liška


On 10/8/21 13:12, Jakub Jelinek wrote:

I think changing the default is ok, but dropping --full is not,
it should stay and behave the way it did before (i.e. print
r12-4245-gdb3d7270b42fe27fb05664c4fdf524ab7ad13a75
same thing as the new default except for full hash instead of
first 14 chars from it).


All right, makes sense, but updating the git alias doesn't work for me.
Thus I'm suggesting a Python replacement of it, which does:

$ ./describe.py basepoints/gcc-11

r11-0-g50ee04838efc16


$ ./describe.py

r12-4285-g07dd3bcda17f97


$ ./describe.py --short

r12-4285


$ ./describe.py --full

r12-4285-g07dd3bcda17f97cf5476c3d6f2f2501c1e0712e6


Motivation behind usage of Python:
- the script can be put to contrib and nobody would have to re-run 
contrib/gcc-git-customization.sh locally
- it's not a single line script and one doesn't have to do bash escaping in the 
contrib/gcc-git-customization.sh script
- positional argument support

Thoughts?

Note I can port the same way the counterpart script.
One another note:

$ git gcc-descr basepoints/gcc-11

r11-0


seems to me like bogus.

Cheers,
Martin#!/usr/bin/env python3

import argparse
import subprocess
import sys

DEFAULT_REV = 'master'
PREFIX = 'basepoints/gcc-'
hash_length = 14


def run_git(cmd):
return subprocess.run(cmd, shell=True, encoding='utf8',
  stdout=subprocess.PIPE, stderr=subprocess.PIPE)


parser = argparse.ArgumentParser(description='Describe a GCC git commit.')
parser.add_argument('revision', nargs='?', default=DEFAULT_REV,
help=f'Described revision ("{DEFAULT_REV}" by default)')
parser.add_argument('--full', '-f', action='store_true',
help='Print complete git hash')
parser.add_argument('--short', '-s', action='store_true',
help='Shorten described revision')
args = parser.parse_args()

if args.full:
hash_length = 40

r = run_git(f'git describe --all --match {PREFIX}[0-9]* '
f'{args.revision} --abbrev={hash_length}')
if r.returncode != 0:
print(r.stderr, end='')
sys.exit(1)

# produces e.g. r12-4285-g07dd3bcda17f97
descr = r.stdout.strip()
assert PREFIX in descr
descr = 'r' + descr[descr.find(PREFIX) + len(PREFIX):]

# handle basepoints
if '-' not in descr:
r = run_git(f'git rev-parse {args.revision}')
descr += '-0-g' + r.stdout.strip()[:hash_length]

parts = descr.split('-')
assert len(parts) == 3

if args.short:
descr = '-'.join(parts[:-1])

# verify common ancestor
r = run_git('git config --get gcc-config.upstream')
upstream = r.stdout.strip() if r.returncode else 'origin'
gcc_branch = parts[0][1:]

r = run_git(f'git rev-parse --quiet --verify origin/releases/gcc-{gcc_branch}')
branch = f'releases/gcc-{gcc_branch}' if r.returncode == 0 else 'master'

r = run_git(f'git merge-base --is-ancestor {args.revision} '
f'{upstream}/{branch}')
if r.returncode != 0:
print(r.stderr)
sys.exit(2)

print(descr)

Re: [PATCH 03/13] arm: Add test for PR target/101325

2021-10-11 Thread Christophe LYON via Gcc-patches



On 28/09/2021 15:30, Christophe LYON via Gcc-patches wrote:


On 28/09/2021 13:14, Kyrylo Tkachov wrote:



-Original Message-
From: Gcc-patches  On Behalf Of Christophe
Lyon via Gcc-patches
Sent: 07 September 2021 10:15
To: gcc-patches@gcc.gnu.org
Subject: [PATCH 03/13] arm: Add test for PR target/101325

This test is derived from the one provided in the PR: it is a
compile-only test because I do not have access to anything that could
execute it.  We can switch it do 'dg-do run' later, however it would
be better to write a new executable test to ensure coverage in case
the tester cannot execute such code (and it will need a new
arm_v8_1m_mve_hw or similar effective-target).

The test is okay for now.
I think we'll want to have a arm_v8_1m_mve_hw target sooner or later.
Maybe Alex or Andrea can help to write one we can use?



Since I posted the patch series, QEMU has gained support for MVE, I 
plan to write a similar testcase which is executable.


There's already an executable testcase in the PR.

Thanks

Christophe


Here is an updated version of this patch, which adds an executable test.

I thought I would re-post the whole series later, but I haven't yet 
received feedback on the main patches, which I expect to trigger some 
discussions.


Christophe





Thanks,
Kyrill


2021-09-01  Christophe Lyon 

gcc/testsuite/
PR target/101325
* gcc.target/arm/simd/pr101325.c: New.

diff --git a/gcc/testsuite/gcc.target/arm/simd/pr101325.c
b/gcc/testsuite/gcc.target/arm/simd/pr101325.c
new file mode 100644
index 000..a466683a0b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/pr101325.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include 
+
+unsigned foo(int8x16_t v, int8x16_t w)
+{
+  return vcmpeqq (v, w);
+}
+/* { dg-final { scan-assembler {\tvcmp.i8  eq} } } */
+/* { dg-final { scan-assembler {\tvmrs\t r[0-9]+, P0} } } */
+/* { dg-final { scan-assembler {\tuxth} } } */
--
2.25.1From ef48339f8048ee6417845ed2e6fd95f550ee798e Mon Sep 17 00:00:00 2001
From: Christophe Lyon 
Date: Wed, 25 Aug 2021 17:26:31 +
Subject: [PATCH v2 03/14] arm: Add tests for PR target/101325

These tests are derived from the one provided in the PR: there is a
compile-only test because I did not have access to anything that could
execute MVE code until recently.
I have been able to add an executable test since QEMU supports MVE.

Instead of adding arm_v8_1m_mve_hw, I update arm_mve_hw so that it
uses add_options_for_arm_v8_1m_mve_fp, like arm_neon_hw does.  This
ensures arm_mve_hw passes even if the toolchain does not generate MVE
code by default.

2021-10-01  Christophe Lyon  

gcc/testsuite/
PR target/101325
* gcc.target/arm/simd/pr101325.c: New.
* gcc.target/arm/simd/pr101325-2.c: New.
* lib/target-supports.exp (check_effective_target_arm_mve_hw): Use
add_options_for_arm_v8_1m_mve_fp.

add executable test and update check_effective_target_arm_mve_hw

diff --git a/gcc/testsuite/gcc.target/arm/simd/pr101325-2.c 
b/gcc/testsuite/gcc.target/arm/simd/pr101325-2.c
new file mode 100644
index 000..7907a386385
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/pr101325-2.c
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O3" } */
+/* { dg-add-options arm_v8_1m_mve } */
+
+#include 
+
+
+__attribute((noinline,noipa))
+unsigned foo(int8x16_t v, int8x16_t w)
+{
+  return vcmpeqq (v, w);
+}
+
+int main(void)
+{
+  if (foo (vdupq_n_s8(0), vdupq_n_s8(0)) != 0xU)
+__builtin_abort ();
+}
diff --git a/gcc/testsuite/gcc.target/arm/simd/pr101325.c 
b/gcc/testsuite/gcc.target/arm/simd/pr101325.c
new file mode 100644
index 000..a466683a0b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/pr101325.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include 
+
+unsigned foo(int8x16_t v, int8x16_t w)
+{
+  return vcmpeqq (v, w);
+}
+/* { dg-final { scan-assembler {\tvcmp.i8  eq} } } */
+/* { dg-final { scan-assembler {\tvmrs\t r[0-9]+, P0} } } */
+/* { dg-final { scan-assembler {\tuxth} } } */
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index e030e4f376b..b0e35b602af 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4889,6 +4889,7 @@ proc check_effective_target_arm_cmse_hw { } {
}
 } "-mcmse -Wl,--section-start,.gnu.sgstubs=0x0040"]
 }
+
 # Return 1 if the target supports executing MVE instructions, 0
 # otherwise.
 
@@ -4904,7 +4905,7 @@ proc check_effective_target_arm_mve_hw {} {
   : "0" (a), "r" (b));
  return (a != 2);
}
-} ""]
+} [add_options_for_arm_v8_1m_mve_fp ""]]

Re: [PATCH 04/13] arm: Add GENERAL_AND_VPR_REGS regclass

2021-10-11 Thread Christophe LYON via Gcc-patches



On 28/09/2021 15:32, Christophe LYON via Gcc-patches wrote:


On 28/09/2021 13:18, Kyrylo Tkachov wrote:

Hi Christophe,


-Original Message-
From: Gcc-patches  On Behalf Of Christophe
LYON via Gcc-patches
Sent: 08 September 2021 08:49
To: Richard Earnshaw ; gcc-
patc...@gcc.gnu.org
Subject: Re: [PATCH 04/13] arm: Add GENERAL_AND_VPR_REGS regclass


On 07/09/2021 15:35, Richard Earnshaw wrote:


On 07/09/2021 13:05, Christophe LYON wrote:

On 07/09/2021 11:42, Richard Earnshaw wrote:


On 07/09/2021 10:15, Christophe Lyon via Gcc-patches wrote:
At some point during the development of this patch series, it 
appeared

that in some cases the register allocator wants “VPR or general”
rather than “VPR or general or FP” (which is the same thing as
ALL_REGS).  The series does not seem to require this anymore, 
but it
seems to be a good thing to do anyway, to give the register 
allocator

more freedom.

2021-09-01  Christophe Lyon 

 gcc/
 * config/arm/arm.h (reg_class): Add GENERAL_AND_VPR_REGS.
 (REG_CLASS_NAMES): Likewise.
 (REG_CLASS_CONTENTS): Likewise. Add VPR_REG to ALL_REGS.

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 015299c1534..fab39d05916 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1286,6 +1286,7 @@ enum reg_class
 SFP_REG,
 AFP_REG,
 VPR_REG,
+  GENERAL_AND_VPR_REGS,
 ALL_REGS,
 LIM_REG_CLASSES
   };
@@ -1315,6 +1316,7 @@ enum reg_class
 "SFP_REG",    \
 "AFP_REG",    \
 "VPR_REG",    \
+  "GENERAL_AND_VPR_REGS", \
 "ALL_REGS"    \
   }
   @@ -1343,7 +1345,8 @@ enum reg_class
 { 0x, 0x, 0x, 0x0040 }, /* SFP_REG
*/    \
 { 0x, 0x, 0x, 0x0080 }, /* AFP_REG
*/    \
 { 0x, 0x, 0x, 0x0400 }, /* 
VPR_REG.

*/    \
-  { 0x7FFF, 0x, 0x, 0x000F } /* ALL_REGS.
*/    \
+  { 0x5FFF, 0x, 0x, 0x0400 }, /*
GENERAL_AND_VPR_REGS.  */ \
+  { 0x7FFF, 0x, 0x, 0x040F } /* ALL_REGS.
*/    \
   }

You've changed the definition of ALL_REGS here (to include VPR_REG),
but not really explained why.  Is that the source of the underlying
issue with the 'appeared' you mention?


I first added VPR_REG to ALL_REGS, but Richard Sandiford suggested I
create a new GENERAL_AND_VPR_REGS that would be more restrictive. I
did not remove VPR_REG from ALL_REGS because I thought it was an
omission: shouldn't ALL_REGS contain all registers?

Surely that should be a separate patch then.
OK, I can remove that line from this patch and make a separate 
one-liner

for ALL_REGS.
Did you end up sending that patch out? (Sorry, I may have missed it 
in my archive).
This patch to add GENERAL_AND_VPR_REGS is okay with the ALL_REGS 
change separated out.


No I didn't send it yet: I suspect there will be iterations on the 
next patches in the series, this small change alone wasn't worth 
sending a v2 :-)



Here is the patch now split into two parts.

Christophe



Thanks,

Christophe




Thanks,
Kyrill


Thanks,

Christophe



R.




R.



     #define FP_SYSREGS \
From c57fb3fc853d8bf04f589682f03e9d3baac2dbd5 Mon Sep 17 00:00:00 2001
From: Christophe Lyon 
Date: Thu, 26 Aug 2021 16:01:58 +
Subject: [PATCH v2 04/14] arm: Add GENERAL_AND_VPR_REGS regclass
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At some point during the development of this patch series, it appeared
that in some cases the register allocator wants “VPR or general”
rather than “VPR or general or FP” (which is the same thing as
ALL_REGS).  The series does not seem to require this anymore, but it
seems to be a good thing to do anyway, to give the register allocator
more freedom.

2021-09-01  Christophe Lyon  

gcc/
* config/arm/arm.h (reg_class): Add GENERAL_AND_VPR_REGS.
(REG_CLASS_NAMES): Likewise.
(REG_CLASS_CONTENTS): Likewise.

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 015299c1534..eae1b1cd0fb 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1286,6 +1286,7 @@ enum reg_class
   SFP_REG,
   AFP_REG,
   VPR_REG,
+  GENERAL_AND_VPR_REGS,
   ALL_REGS,
   LIM_REG_CLASSES
 };
@@ -1315,6 +1316,7 @@ enum reg_class
   "SFP_REG",   \
   "AFP_REG",   \
   "VPR_REG",   \
+  "GENERAL_AND_VPR_REGS", \
   "ALL_REGS"   \
 }
 
@@ -1343,6 +1345,7 @@ enum reg_class
   { 0x, 0x, 0x, 0x0040 }, /* SFP_REG */\
   { 0x, 0x, 0x, 0x0080 }, /* AFP_REG */\
   { 0x, 0x, 0x, 0x0400 }, /* VPR_REG.  */  \
+  { 0x5FFF, 0x, 0x, 0x0400 }, /* GENERAL_AND_VPR_REGS. 
 */ \
   { 0x7FFF, 0x, 0x, 0x000F }  /* ALL_REGS.  */ \
 }
 
-- 
2.25.1

From ce9429d59d513b2998f73c6e256702ad447f2ae7 Mon Sep 17 00:00:00 2001
From: Christophe Lyon 
Date: Wed, 8 Sep 2021 08:

Re: [PATCH] opts: Remove AUTODETECT_VALUE usage.

2021-10-11 Thread Richard Biener via Gcc-patches

On Mon, Oct 11, 2021 at 12:58 PM Martin Liška  wrote:
>
> The patch is about using OPTION_SET_P instead of a default
> option value set in common.opt.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

OK, but...

> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> * common.opt: Remove Init(2) for some options.

you could enumerate the three changed opts here.

> * toplev.c (process_options): Do not use AUTODETECT_VALUE, but
> use rather OPTION_SET_P.
> ---
>   gcc/common.opt | 6 +++---
>   gcc/toplev.c   | 6 +++---
>   2 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 59ecc9fbdf7..4099effcc80 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2399,7 +2399,7 @@ Common Var(flag_live_range_shrinkage) Init(0) 
> Optimization
>   Relief of register pressure through live range shrinkage.
>
>   frename-registers
> -Common Var(flag_rename_registers) Init(2) Optimization
> +Common Var(flag_rename_registers) Optimization
>   Perform a register renaming optimization pass.
>
>   fschedule-fusion
> @@ -2797,7 +2797,7 @@ Common Ignore
>   Does nothing.  Preserved for backward compatibility.
>
>   ftree-cselim
> -Common Var(flag_tree_cselim) Init(2) Optimization
> +Common Var(flag_tree_cselim) Optimization
>   Transform condition stores into unconditional ones.
>
>   ftree-switch-conversion
> @@ -3158,7 +3158,7 @@ Common Var(flag_value_profile_transformations) 
> Optimization
>   Use expression value profiles in optimizations.
>
>   fweb
> -Common Var(flag_web) Init(2) Optimization
> +Common Var(flag_web) Optimization
>   Construct webs and split unrelated uses of single variable.
>
>   ftree-builtin-call-dce
> diff --git a/gcc/toplev.c b/gcc/toplev.c
> index b878234f3f2..167feac2583 100644
> --- a/gcc/toplev.c
> +++ b/gcc/toplev.c
> @@ -1332,10 +1332,10 @@ process_options (bool no_backend)
>   }
>
> /* web and rename-registers help when run after loop unrolling.  */
> -  if (flag_web == AUTODETECT_VALUE)
> +  if (!OPTION_SET_P (flag_web))
>   flag_web = flag_unroll_loops;
>
> -  if (flag_rename_registers == AUTODETECT_VALUE)
> +  if (!OPTION_SET_P (flag_rename_registers))
>   flag_rename_registers = flag_unroll_loops;
>
> if (flag_non_call_exceptions)
> @@ -1598,7 +1598,7 @@ process_options (bool no_backend)
> debug_inline_points = 0;
>   }
>
> -  if (flag_tree_cselim == AUTODETECT_VALUE)
> +  if (!OPTION_SET_P (flag_tree_cselim))
>   {
> if (HAVE_conditional_move)
> flag_tree_cselim = 1;
> --
> 2.33.0
>

Re: [Patch 2/7, Arm, GCC] Add option -mbranch-protection.

2021-10-11 Thread Richard Earnshaw via Gcc-patches


On 08/10/2021 13:17, Tejas Belagod via Gcc-patches wrote:

Hi,

Add -mbranch-protection option and its associated parsing routines.
This option enables the code-generation of pointer signing and
authentication instructions in function prologues and epilogues.

Tested on arm-none-eabi. OK for trunk?

2021-10-04  Tejas Belagod  

gcc/ChangeLog:

* common/config/arm/arm-common.c
 (arm_print_hit_for_pacbti_option): New.
 (arm_progress_next_token): New.
 (arm_parse_pac_ret_clause): New routine for parsing the
pac-ret clause for -mbranch-protection.
(arm_parse_pacbti_option): New routine to parse all the options
to -mbranch-protection.
* config/arm/arm-protos.h (arm_parse_pacbti_option): Export.
* config/arm/arm.c (arm_configure)build_target): Handle option
to -mbranch-protection.
* config/arm/arm.opt (mbranch-protection). New.
(arm_enable_pacbti): New.



You're missing documentation for invoke.texi.

Also, how does this differ from the exising option in aarch64?  Can the 
code from that be adapted to be made common to both targets rather than 
doing a new implementation?


Finally, there are far to many manifest constants in this patch, they 
need replacing with enums or #defines as appropriate if we cannot share 
the aarch64 code.


R.

Re: [PATCH] Remove usage of IRA_REGION_AUTODETECT

2021-10-11 Thread Richard Biener via Gcc-patches

On Mon, Oct 11, 2021 at 12:58 PM Martin Liška  wrote:
>
> Similar patch, let's rely on OPTION_SET_P and not a default
> options value.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

OK.

> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> * common.opt: Remove usage of IRA_REGION_AUTODETECT.
> * flag-types.h (enum ira_region): Likewise.
> * toplev.c (process_options): Use OPTION_SET_P instead of
> IRA_REGION_AUTODETECT.
> ---
>   gcc/common.opt   | 2 +-
>   gcc/flag-types.h | 4 
>   gcc/toplev.c | 2 +-
>   3 files changed, 2 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 52693e226d2..59ecc9fbdf7 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -1923,7 +1923,7 @@ EnumValue
>   Enum(ira_algorithm) String(priority) Value(IRA_ALGORITHM_PRIORITY)
>
>   fira-region=
> -Common Joined RejectNegative Enum(ira_region) Var(flag_ira_region) 
> Init(IRA_REGION_AUTODETECT) Optimization
> +Common Joined RejectNegative Enum(ira_region) Var(flag_ira_region) 
> Init(IRA_REGION_ONE) Optimization
>   -fira-region=[one|all|mixed]  Set regions for IRA.
>
>   Enum
> diff --git a/gcc/flag-types.h b/gcc/flag-types.h
> index 5bd1f771c8b..ae0b216e8a3 100644
> --- a/gcc/flag-types.h
> +++ b/gcc/flag-types.h
> @@ -191,10 +191,6 @@ enum ira_region
> IRA_REGION_ONE,
> IRA_REGION_ALL,
> IRA_REGION_MIXED,
> -  /* This value means that there were no options -fira-region on the
> - command line and that we should choose a value depending on the
> - used -O option.  */
> -  IRA_REGION_AUTODETECT
>   };
>
>   /* The options for excess precision.  */
> diff --git a/gcc/toplev.c b/gcc/toplev.c
> index 81748b1152a..b878234f3f2 100644
> --- a/gcc/toplev.c
> +++ b/gcc/toplev.c
> @@ -1319,7 +1319,7 @@ process_options (bool no_backend)
>   }
>
> /* One region RA really helps to decrease the code size.  */
> -  if (flag_ira_region == IRA_REGION_AUTODETECT)
> +  if (!OPTION_SET_P (flag_ira_region))
>   flag_ira_region
> = optimize_size || !optimize ? IRA_REGION_ONE : IRA_REGION_MIXED;
>
> --
> 2.33.0
>

Re: [PATCH] options: Fix variable tracking option processing.

2021-10-11 Thread Richard Biener via Gcc-patches

On Mon, Oct 11, 2021 at 1:02 PM Martin Liška  wrote:
>
> After the recent change in Optimize attribute handling, we need
> finish_option function properly auto-detecting variable tracking options.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?
> Thanks,
> Martin
>
> PR debug/102585
>
> gcc/ChangeLog:
>
> * common.opt: Do not init flag_var_tracking* options.
> * opts.c (finish_options): Handle flag_var_tracking* options.
> * toplev.c (process_options): Move to opts.c.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/pr102585.c: New test.
> ---
>   gcc/common.opt  | 14 +-
>   gcc/opts.c  | 28 
>   gcc/testsuite/gcc.dg/pr102585.c |  6 ++
>   gcc/toplev.c| 33 +++--
>   4 files changed, 42 insertions(+), 39 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.dg/pr102585.c
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 52693e226d2..ec020f4e642 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -3003,19 +3003,16 @@ Common Undocumented Var(flag_use_linker_plugin)
>
>   ; Positive if we should track variables, negative if we should run
>   ; the var-tracking pass only to discard debug annotations, zero if
> -; we're not to run it.  When flag_var_tracking == 2 (AUTODETECT_VALUE) it
> -; will be set according to optimize, debug_info_level and debug_hooks
> -; in process_options ().
> +; we're not to run it.
>   fvar-tracking
> -Common Var(flag_var_tracking) Init(2) PerFunction
> +Common Var(flag_var_tracking) PerFunction
>   Perform variable tracking.
>
>   ; Positive if we should track variables at assignments, negative if
>   ; we should run the var-tracking pass only to discard debug
> -; annotations.  When flag_var_tracking_assignments ==
> -; AUTODETECT_VALUE it will be set according to flag_var_tracking.
> +; annotations.
>   fvar-tracking-assignments
> -Common Var(flag_var_tracking_assignments) Init(2) PerFunction
> +Common Var(flag_var_tracking_assignments) PerFunction
>   Perform variable tracking by annotating assignments.
>
>   ; Nonzero if we should toggle flag_var_tracking_assignments after
> @@ -3026,8 +3023,7 @@ Toggle -fvar-tracking-assignments.
>
>   ; Positive if we should track uninitialized variables, negative if
>   ; we should run the var-tracking pass only to discard debug
> -; annotations.  When flag_var_tracking_uninit == AUTODETECT_VALUE it
> -; will be set according to flag_var_tracking.
> +; annotations.
>   fvar-tracking-uninit
>   Common Var(flag_var_tracking_uninit) PerFunction
>   Perform variable tracking and also tag variables that are uninitialized.
> diff --git a/gcc/opts.c b/gcc/opts.c
> index 2116c2991dd..eeb6b1dcc7c 100644
> --- a/gcc/opts.c
> +++ b/gcc/opts.c
> @@ -1353,6 +1353,34 @@ finish_options (struct gcc_options *opts, struct 
> gcc_options *opts_set,
>   SET_OPTION_IF_UNSET (opts, opts_set, flag_vect_cost_model,
>  VECT_COST_MODEL_CHEAP);
>
> +  /* If the user specifically requested variable tracking with tagging
> + uninitialized variables, we need to turn on variable tracking.
> + (We already determined above that variable tracking is feasible.)  */
> +  if (opts->x_flag_var_tracking_uninit == 1)
> +opts->x_flag_var_tracking = 1;
> +
> +  if (!opts_set->x_flag_var_tracking)
> +opts->x_flag_var_tracking = optimize >= 1;

That's still not equivalent to the old code for -fvar-tracking-uninit which
sets opts->x_flag_var_tracking to 1 and the old code checked that
for AUTOINIT_VALUE but you override it here for -O0.

> +  if (!opts_set->x_flag_var_tracking_uninit)
> +opts->x_flag_var_tracking_uninit = opts->x_flag_var_tracking;
> +
> +  if (!opts_set->x_flag_var_tracking_assignments)
> +opts->x_flag_var_tracking_assignments
> +  = (opts->x_flag_var_tracking
> +&& !(opts->x_flag_selective_scheduling
> + || opts->x_flag_selective_scheduling2));
> +
> +  if (opts->x_flag_var_tracking_assignments_toggle)
> +opts->x_flag_var_tracking_assignments = 
> !opts->x_flag_var_tracking_assignments;
> +
> +  if (opts->x_flag_var_tracking_assignments && !opts->x_flag_var_tracking)
> +opts->x_flag_var_tracking = opts->x_flag_var_tracking_assignments = -1;
> +
> +  if (opts->x_flag_var_tracking_assignments
> +  && (opts->x_flag_selective_scheduling || 
> opts->x_flag_selective_scheduling2))
> +warning_at (loc, 0,
> +   "var-tracking-assignments changes selective scheduling");
>   }
>
>   #define LEFT_COLUMN   27
> diff --git a/gcc/testsuite/gcc.dg/pr102585.c b/gcc/testsuite/gcc.dg/pr102585.c
> new file mode 100644
> index 000..efd066b4a4e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr102585.c
> @@ -0,0 +1,6 @@
> +/* PR debug/102585 */
> +/* { dg-do compile } */
> +/* { dg-options "-fvar-tracking-assignments -fno-var-tracking" } */
> +
> +#pragma GCC

Re: [PATCH] tree-optimization: [PR102622]: wrong code due to signed one bit integer and "a?-1:0"

2021-10-11 Thread Michael Matz via Gcc-patches

Hello,

On Sat, 9 Oct 2021, apinski--- via Gcc-patches wrote:

> +  (lshift (convert (convert:boolean_type_node @0)) { shift; })))
> +/* a ? -1 : 0 -> -a.  No need to check the TYPE_PRECISION not being 1
> +   here as the powerof2cst case above will handle that case correctly.  
> */

Well, but the QoI will improve quite a bit when you just do the check, 
instead of relying on order of patterns.  It's not slow or harmful to 
check and will make the order irrelevant, which, given the number of 
patterns we already have, is a good thing.  (It will also be smaller to 
check than to document why the check isn't needed :-) )

Ciao,
Michael.

Re: [PATCH 05/13] arm: Add support for VPR_REG in arm_class_likely_spilled_p

2021-10-11 Thread Richard Sandiford via Gcc-patches

Sorry for the very long delay in reviewing this.  Things have been
a bit hectic recently.

Christophe Lyon via Gcc-patches  writes:
> VPR_REG is the only register in its class, so it should be handled by
> TARGET_CLASS_LIKELY_SPILLED_P.  No test fails without this patch, but
> it seems it should be implemented.

I think instead we should change the “return false” so that it
calls the default implementation (default_class_likely_spilled_p).
That should handle this case correctly, as well as any future
single-register classes that we might add.

Thanks,
Richard

>
> 2021-09-01  Christophe Lyon  
>
>   gcc/
>   * config/arm/arm.c (arm_class_likely_spilled_p): Handle VPR_REG.
>
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 11dafc70067..1222cb0d0fe 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -29307,6 +29307,9 @@ arm_class_likely_spilled_p (reg_class_t rclass)
>|| rclass  == CC_REG)
>  return true;
>  
> +  if (TARGET_HAVE_MVE && (rclass == VPR_REG))
> +return true;
> +
>return false;
>  }

Re: [PATCH] options: Fix variable tracking option processing.

2021-10-11 Thread Martin Liška


On 10/11/21 15:05, Richard Biener wrote:

+  if (!opts_set->x_flag_var_tracking)
+opts->x_flag_var_tracking = optimize >= 1;

That's still not equivalent to the old code for -fvar-tracking-uninit which
sets opts->x_flag_var_tracking to 1 and the old code checked that
for AUTOINIT_VALUE but you override it here for -O0.



Do you mean the newly added code:

+  if (!opts_set->x_flag_var_tracking)

+opts->x_flag_var_tracking = optimize >= 1;


that should be equivalent to:

-  if (flag_var_tracking == AUTODETECT_VALUE)

-flag_var_tracking = optimize >= 1;


? Or do I miss something?

Thanks,
Martin

Re: [Patch 3/7, Arm, GCC] Add testsuite library support for PACBTI target.

2021-10-11 Thread Richard Earnshaw via Gcc-patches


On 08/10/2021 13:17, Tejas Belagod via Gcc-patches wrote:

Hi,

Add targeting-checking entities for PACBTI in testsuite
framework.

Tested on arm-none-eabi. OK for trunk?

2021-10-04  Tejas Belagod  

gcc/ChangeLog:

* testsuite/lib/target-supports.exp
(check_effective_target_arm_pacbti_hw): New.



OK.

R.

Re: [Patch 3/7, Arm, GCC] Add testsuite library support for PACBTI target.

2021-10-11 Thread Richard Earnshaw via Gcc-patches


On 11/10/2021 14:36, Richard Earnshaw via Gcc-patches wrote:

On 08/10/2021 13:17, Tejas Belagod via Gcc-patches wrote:

Hi,

Add targeting-checking entities for PACBTI in testsuite
framework.

Tested on arm-none-eabi. OK for trunk?

2021-10-04  Tejas Belagod  

gcc/ChangeLog:

* testsuite/lib/target-supports.exp
(check_effective_target_arm_pacbti_hw): New.



OK.

R.


Oh, wait!  Not OK.  Needs documentation in sourcebuild.texi.

R.

[Ada] Size of time_t in newer verions of VxWorks7

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

Set the default size of time_t_bits to match the standard runtimes. The
size must match that which is used in the VSB, since the same VSB is
used to build all of the runtimes.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/s-parame__ae653.ads (time_t_bits): Change to
Long_Long_Integer'Size.  Add some comments to explain.diff --git a/gcc/ada/libgnat/s-parame__ae653.ads b/gcc/ada/libgnat/s-parame__ae653.ads
--- a/gcc/ada/libgnat/s-parame__ae653.ads
+++ b/gcc/ada/libgnat/s-parame__ae653.ads
@@ -104,8 +104,12 @@ package System.Parameters is
-- Characteristics of time_t type --

 
-   time_t_bits : constant := Long_Integer'Size;
-   --  Number of bits in type time_t
+   --  IMPORTANT NOTE:
+   --  time_t_bits must match the size specified in the VSB.
+
+   time_t_bits : constant := Long_Long_Integer'Size;
+   --  Number of bits in type time_t for SR0660 and newer,
+   --  with the default configuration of the VSB.
 
--
-- Characteristics of types in Interfaces.C --

[Ada] Simplify code for checks within an initialization procedure

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

It is simpler to access the type of first formal using semantic instead
of syntactic query. Behaviour is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_util.adb (Compile_Time_Constraint_Error): Simplify getting
the type of the first formal parameter.diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -6590,9 +6590,7 @@ package body Sem_Util is
if Inside_Init_Proc then
   declare
  Init_Proc_Type : constant Entity_Id :=
-   Entity (Parameter_Type (First
- (Parameter_Specifications
-(Parent (Current_Scope_No_Loops);
+   Etype (First_Formal (Current_Scope_No_Loops));
 
  Conc_Typ : constant Entity_Id :=
(if Present (Init_Proc_Type)

[Ada] Fix crash on array component with Default_Value

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

When complaining about a compile-time constraint error within a default
initialization procedure we assumed that this procedure initializes a
record object. However, it can initialize an array object too.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_util.adb (Inside_Init_Proc): Simplify.
* sem_aggr.adb (Resolve_Record_Aggregate): Fix style.
* sem_util.adb (Compile_Time_Constraint_Error): Guard against
calling Corresponding_Concurrent_Type with an array type entity.diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -7994,10 +7994,8 @@ package body Exp_Util is
--
 
function Inside_Init_Proc return Boolean is
-  Proc : constant Entity_Id := Enclosing_Init_Proc;
-
begin
-  return Proc /= Empty;
+  return Present (Enclosing_Init_Proc);
end Inside_Init_Proc;
 
--


diff --git a/gcc/ada/sem_aggr.adb b/gcc/ada/sem_aggr.adb
--- a/gcc/ada/sem_aggr.adb
+++ b/gcc/ada/sem_aggr.adb
@@ -5307,8 +5307,8 @@ package body Sem_Aggr is
 
   Add_Association
(Component  => Component,
-Expr   => Empty,
-Assoc_List => New_Assoc_List,
+Expr   => Empty,
+Assoc_List => New_Assoc_List,
 Is_Box_Present => True);
 
elsif Present (Parent (Component))


diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -6589,11 +6589,16 @@ package body Sem_Util is
 
if Inside_Init_Proc then
   declare
+ Init_Proc_Type : constant Entity_Id :=
+   Entity (Parameter_Type (First
+ (Parameter_Specifications
+(Parent (Current_Scope_No_Loops);
+
  Conc_Typ : constant Entity_Id :=
-  Corresponding_Concurrent_Type
-(Entity (Parameter_Type (First
-  (Parameter_Specifications
-(Parent (Current_Scope));
+   (if Present (Init_Proc_Type)
+  and then Init_Proc_Type in E_Record_Type_Id
+then Corresponding_Concurrent_Type (Init_Proc_Type)
+else Empty);
 
   begin
  --  Don't complain if the corresponding concurrent type

[Ada] Do not clear Analyzed flag in expand if already set by preanalysis

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

During Expand, prevent the clearing of the Analyzed flag if it has
already been set by Fold_Ureal.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* expander.adb (Expand): Skip clearing of Analyzed flag if
already set for N_Real_Literal.diff --git a/gcc/ada/expander.adb b/gcc/ada/expander.adb
--- a/gcc/ada/expander.adb
+++ b/gcc/ada/expander.adb
@@ -49,6 +49,7 @@ with Sem_Ch8;use Sem_Ch8;
 with Sem_Util;   use Sem_Util;
 with Sinfo;  use Sinfo;
 with Sinfo.Nodes;use Sinfo.Nodes;
+with Stand;  use Stand;
 with Table;
 
 package body Expander is
@@ -152,7 +153,19 @@ package body Expander is
   --  not take place. This prevents cascaded errors due to stack mismatch.
 
   elsif not Expander_Active then
- Set_Analyzed (N, Full_Analysis);
+
+ --  Do not clear the Analyzed flag if it has been set on purpose
+ --  during preanalysis in Fold_Ureal. In that case, the Etype field
+ --  in N_Real_Literal will be set to something different than
+ --  Universal_Real.
+
+ if Full_Analysis
+   or else not (Nkind (N) = N_Real_Literal
+  and then Present (Etype (N))
+  and then Etype (N) /= Universal_Real)
+ then
+Set_Analyzed (N, Full_Analysis);
+ end if;
 
  if Serious_Errors_Detected > 0 and then Scope_Is_Transient then
 Scope_Stack.Table

[Ada] RTEMS: use default stack checking emulation package

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

Remove the RTEMS specific version of System.Stack_Checking.Operations as
the internal RTEMS API it uses can only detect stack overflow after the
event, whereas the stack checking emulation is meant to detect the stack
overflow before it occurs. Use the standard
System.Stack_Checking.Operations package instead and include it's object
file in the runtime library.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* Makefile.rtl (RTEMS): Add s-stchop.o to
EXTRA_GNATRTL_NONTASKING_OBJS, remove s-stchop__rtems.adb.
* libgnat/s-stchop__rtems.adb: Removed.diff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl
--- a/gcc/ada/Makefile.rtl
+++ b/gcc/ada/Makefile.rtl
@@ -2057,9 +2057,10 @@ ifeq ($(strip $(filter-out rtems%,$(target_os))),)
   s-taprop.adbhttp://www.gnu.org/licenses/>.  --
---  --
--- GNARL was developed by the GNARL team at Florida State University.   --
--- Extensive contributions were provided by Ada Core Technologies, Inc. --
---  --
---
-
---  This is the RTEMS version of this package.
---  This file should be kept synchronized with the general implementation
---  provided by s-stchop.adb.
-
-pragma Restrictions (No_Elaboration_Code);
---  We want to guarantee the absence of elaboration code because the
---  binder does not handle references to this package.
-
-with Ada.Exceptions;
-
-with Interfaces.C; use Interfaces.C;
-
-package body System.Stack_Checking.Operations is
-
-   
-   -- Invalidate_Stack_Cache --
-   
-
-   procedure Invalidate_Stack_Cache (Any_Stack : Stack_Access) is
-  pragma Warnings (Off, Any_Stack);
-   begin
-  Cache := Null_Stack;
-   end Invalidate_Stack_Cache;
-
-   -
-   -- Notify_Stack_Attributes --
-   -
-
-   procedure Notify_Stack_Attributes
- (Initial_SP : System.Address;
-  Size   : System.Storage_Elements.Storage_Offset)
-   is
-
-  --  RTEMS keeps all the information we need.
-
-  pragma Unreferenced (Size);
-  pragma Unreferenced (Initial_SP);
-
-   begin
-  null;
-   end Notify_Stack_Attributes;
-
-   -
-   -- Stack_Check --
-   -
-
-   function Stack_Check
- (Stack_Address : System.Address) return Stack_Access
-   is
-  pragma Unreferenced (Stack_Address);
-
-  --  RTEMS has a routine to check if the stack is blown.
-  --  It returns a C99 bool.
-  function rtems_stack_checker_is_blown return Interfaces.C.unsigned_char;
-  pragma Import (C,
- rtems_stack_checker_is_blown, "rtems_stack_checker_is_blown");
-
-   begin
-  --  RTEMS has a routine to check this.  So use it.
-
-  if rtems_stack_checker_is_blown /= 0 then
- Ada.Exceptions.Raise_Exception
-   (E   => Storage_Error'Identity,
-Message => "stack overflow detected");
-  end if;
-
-  return null;
-
-   end Stack_Check;
-
-   
-   -- Update_Stack_Cache --
-   
-
-   procedure Update_Stack_Cache (Stack : Stack_Access) is
-   begin
-  if not Multi_Processor then
- Cache := Stack;
-  end if;
-   end Update_Stack_Cache;
-
-end System.Stack_Checking.Operations;

[Ada] Simplify initialization of concurrent components

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

Concurrent record types are either task record types or protected record
types. Now we detect them collectively (when looking for any of them) or
exclusively (when looking for one or the other).

Cleanup code related to fixes in expansion of boxes in record
aggregates. Behaviour is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch3.adb (Build_Init_Statements): Simplify detection of
concurrent record types.diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -3206,9 +3206,7 @@ package body Exp_Ch3 is
  --  types moving any expanded code from the spec to the body of the
  --  init procedure.
 
- if Is_Task_Record_Type (Rec_Type)
-   or else Is_Protected_Record_Type (Rec_Type)
- then
+ if Is_Concurrent_Record_Type (Rec_Type) then
 declare
Decl : constant Node_Id :=
 Parent (Corresponding_Concurrent_Type (Rec_Type));
@@ -3589,12 +3587,11 @@ package body Exp_Ch3 is
   end loop;
end if;
 end;
- end if;
 
  --  For a protected type, add statements generated by
  --  Make_Initialize_Protection.
 
- if Is_Protected_Record_Type (Rec_Type) then
+ elsif Is_Protected_Record_Type (Rec_Type) then
 Append_List_To (Stmts,
   Make_Initialize_Protection (Rec_Type));
  end if;

[Ada] Remove redundant guard against an empty component list

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

There is no need to explicitly guard against an empty list where the
subsequent iteration with First/Next/Present works fine.

Cleanup related to expansion of aggregates in GNATprove mode; behaviour
is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_aggr.adb (Component_OK_For_Backend): Remove redundant
guard.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -8547,10 +8547,6 @@ package body Exp_Aggr is
  Expr_Q : Node_Id;
 
   begin
- if No (Comps) then
-return True;
- end if;
-
  C := First (Comps);
  while Present (C) loop

[Ada] Move rewriting of boxes in aggregates from resolution to expansion

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

Rewriting of boxes in record aggregates into the corresponding default
values was done in resolution, where we special-cased access types and
scalar types with a Default_Value aspect.

However, this rewriting rather belong to expansion. Also, the
special-casing didn't take Normalize_Scalars nor Initialize_Scalars
pragmas into account and it didn't work for private types.

Now the resolution keeps boxes that require simple initialization, while
expansion reuses existing routines for initialization of record types.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_aggr.adb (Initialize_Record_Component): Add assertion
about one of the parameters, so that illegal attempts to
initialize record components with Empty node are detected early
on.
(Build_Record_Aggr_Code): Handle boxes in aggregate component
associations just the components with no initialization in
Build_Record_Init_Proc.
* sem_aggr.adb (Resolve_Record_Aggregate): For components that
require simple initialization carry boxes from resolution to
expansion.
* sem_util.adb (Needs_Simple_Initialization): Remove redundant
paren.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -3209,6 +3209,8 @@ package body Exp_Aggr is
  Init_Stmt : Node_Id;
 
   begin
+ pragma Assert (Nkind (Init_Expr) in N_Subexpr);
+
  --  Protect the initialization statements from aborts. Generate:
 
  --Abort_Defer;
@@ -3793,6 +3795,26 @@ package body Exp_Aggr is
 With_Default_Init => True,
 Constructor_Ref   => Expression (Comp)));
 
+ elsif Box_Present (Comp)
+   and then Needs_Simple_Initialization (Etype (Selector))
+ then
+Comp_Expr :=
+  Make_Selected_Component (Loc,
+Prefix=> New_Copy_Tree (Target),
+Selector_Name => New_Occurrence_Of (Selector, Loc));
+
+Initialize_Record_Component
+  (Rec_Comp  => Comp_Expr,
+   Comp_Typ  => Etype (Selector),
+   Init_Expr => Get_Simple_Init_Val
+  (Typ  => Etype (Selector),
+   N=> Comp,
+   Size =>
+ (if Known_Esize (Selector)
+  then Esize (Selector)
+  else Uint_0)),
+   Stmts => L);
+
  --  Ada 2005 (AI-287): For each default-initialized component generate
  --  a call to the corresponding IP subprogram if available.
 


diff --git a/gcc/ada/sem_aggr.adb b/gcc/ada/sem_aggr.adb
--- a/gcc/ada/sem_aggr.adb
+++ b/gcc/ada/sem_aggr.adb
@@ -5387,74 +5387,12 @@ package body Sem_Aggr is
  Assoc_List => New_Assoc_List);
   Set_Has_Self_Reference (N);
 
-   --  A box-defaulted access component gets the value null. Also
-   --  included are components of private types whose underlying
-   --  type is an access type. In either case set the type of the
-   --  literal, for subsequent use in semantic checks.
-
-   elsif Present (Underlying_Type (Ctyp))
- and then Is_Access_Type (Underlying_Type (Ctyp))
-   then
-  --  If the component's type is private with an access type as
-  --  its underlying type then we have to create an unchecked
-  --  conversion to satisfy type checking.
-
-  if Is_Private_Type (Ctyp) then
- declare
-Qual_Null : constant Node_Id :=
-  Make_Qualified_Expression (Sloc (N),
-Subtype_Mark =>
-  New_Occurrence_Of
-(Underlying_Type (Ctyp), Sloc (N)),
-Expression   => Make_Null (Sloc (N)));
-
-Convert_Null : constant Node_Id :=
- Unchecked_Convert_To
-   (Ctyp, Qual_Null);
-
- begin
-Analyze_And_Resolve (Convert_Null, Ctyp);
-Add_Association
-  (Component  => Component,
-   Expr   => Convert_Null,
-   Assoc_List => New_Assoc_List);
- end;
-
-  --  Otherwise the component type is non-private
-
-  else
- Expr := Make_Null (Sloc (N));
- Set_Etype (Expr, Ctyp);
-
- Add_Association
-   (Component  => Component,
-Expr   =>

[Ada] Simplify detection of record components with default initialization

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

When detecting record components with default initialization we did two
iteration over the component list; now we do only one.

Also, there was no need to explicitly guard against an empty list where
the subsequent iteration with First/Next/Present works fine.

Cleanup related to expansion of aggregates in GNATprove mode; behaviour
is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_aggr.adb (Has_Default_Init_Comps): Simplify.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -8897,46 +8897,41 @@ package body Exp_Aggr is

 
function Has_Default_Init_Comps (N : Node_Id) return Boolean is
-  Comps : constant List_Id := Component_Associations (N);
-  C : Node_Id;
+  Assoc : Node_Id;
   Expr  : Node_Id;
+  --  Component association and expression, respectively
 
begin
   pragma Assert (Nkind (N) in N_Aggregate | N_Extension_Aggregate);
 
-  if No (Comps) then
- return False;
-  end if;
-
   if Has_Self_Reference (N) then
  return True;
   end if;
 
-  --  Check if any direct component has default initialized components
+  Assoc := First (Component_Associations (N));
+  while Present (Assoc) loop
+ --  Each component association has either a box or an expression
 
-  C := First (Comps);
-  while Present (C) loop
- if Box_Present (C) then
-return True;
- end if;
+ pragma Assert (Box_Present (Assoc) xor Present (Expression (Assoc)));
 
- Next (C);
-  end loop;
+ --  Check if any direct component has default initialized components
 
-  --  Recursive call in case of aggregate expression
+ if Box_Present (Assoc) then
+return True;
 
-  C := First (Comps);
-  while Present (C) loop
- Expr := Expression (C);
+ --  Recursive call in case of aggregate expression
 
- if Present (Expr)
-   and then Nkind (Expr) in N_Aggregate | N_Extension_Aggregate
-   and then Has_Default_Init_Comps (Expr)
- then
-return True;
+ else
+Expr := Expression (Assoc);
+
+if Nkind (Expr) in N_Aggregate | N_Extension_Aggregate
+  and then Has_Default_Init_Comps (Expr)
+then
+   return True;
+end if;
  end if;
 
- Next (C);
+ Next (Assoc);
   end loop;
 
   return False;

[Ada] Simplify detection of delayed aggregates

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

Replace IF with a single RETURN statement. Cleanup related to expansion
of aggregates in GNATprove mode; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_aggr.adb (Is_Delayed_Aggregate): Simplify.diff --git a/gcc/ada/exp_aggr.adb b/gcc/ada/exp_aggr.adb
--- a/gcc/ada/exp_aggr.adb
+++ b/gcc/ada/exp_aggr.adb
@@ -8978,11 +8978,8 @@ package body Exp_Aggr is
  Kind := Nkind (Node);
   end if;
 
-  if Kind not in N_Aggregate | N_Extension_Aggregate then
- return False;
-  else
- return Expansion_Delayed (Node);
-  end if;
+  return Kind in N_Aggregate | N_Extension_Aggregate
+and then Expansion_Delayed (Node);
end Is_Delayed_Aggregate;

[Ada] Rewrite extended names in derived class-wide expressions

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

When building a derived class-wide pre- or postcondition we are mapping
references to inherited formals and subprogram.

Originally we only did it for simple names; recently we fixed this
mapping to also work for operator symbols; with this patch we also do
this for extended names.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_util.adb (Build_Class_Wide_Expression): Replace entities
of both simple and extended names.diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -1293,7 +1293,7 @@ package body Exp_Util is
 Adjust_Inherited_Pragma_Sloc (N);
  end if;
 
- if Nkind (N) in N_Identifier | N_Operator_Symbol
+ if Nkind (N) in N_Identifier | N_Expanded_Name | N_Operator_Symbol
and then Present (Entity (N))
and then
  (Is_Formal (Entity (N)) or else Is_Subprogram (Entity (N)))

[Ada] Reorder subprogram spec and bodies in alphabetical order

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

Required by the style guide and by future changes in this function.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch4.adb (Analyze_Membership_Op): Reorder subprogram spec
and bodies in alphabetical order.diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -2956,47 +2956,16 @@ package body Sem_Ch4 is
   I_F   : Interp_Index;
   T_F   : Entity_Id;
 
+  procedure Analyze_Set_Membership;
+  --  If a set of alternatives is present, analyze each and find the
+  --  common type to which they must all resolve.
+
   procedure Try_One_Interp (T1 : Entity_Id);
   --  Routine to try one proposed interpretation. Note that the context
   --  of the operation plays no role in resolving the arguments, so that
   --  if there is more than one interpretation of the operands that is
   --  compatible with a membership test, the operation is ambiguous.
 
-  
-  -- Try_One_Interp --
-  
-
-  procedure Try_One_Interp (T1 : Entity_Id) is
-  begin
- if Has_Compatible_Type (R, T1) then
-if Found
-  and then Base_Type (T1) /= Base_Type (T_F)
-then
-   It := Disambiguate (L, I_F, Index, Any_Type);
-
-   if It = No_Interp then
-  Ambiguous_Operands (N);
-  Set_Etype (L, Any_Type);
-  return;
-
-   else
-  T_F := It.Typ;
-   end if;
-
-else
-   Found := True;
-   T_F   := T1;
-   I_F   := Index;
-end if;
-
-Set_Etype (L, T_F);
- end if;
-  end Try_One_Interp;
-
-  procedure Analyze_Set_Membership;
-  --  If a set of alternatives is present, analyze each and find the
-  --  common type to which they must all resolve.
-
   
   -- Analyze_Set_Membership --
   
@@ -3095,6 +3064,37 @@ package body Sem_Ch4 is
  end if;
   end Analyze_Set_Membership;
 
+  
+  -- Try_One_Interp --
+  
+
+  procedure Try_One_Interp (T1 : Entity_Id) is
+  begin
+ if Has_Compatible_Type (R, T1) then
+if Found
+  and then Base_Type (T1) /= Base_Type (T_F)
+then
+   It := Disambiguate (L, I_F, Index, Any_Type);
+
+   if It = No_Interp then
+  Ambiguous_Operands (N);
+  Set_Etype (L, Any_Type);
+  return;
+
+   else
+  T_F := It.Typ;
+   end if;
+
+else
+   Found := True;
+   T_F   := T1;
+   I_F   := Index;
+end if;
+
+Set_Etype (L, T_F);
+ end if;
+  end Try_One_Interp;
+
   Op : Node_Id;
 
--  Start of processing for Analyze_Membership_Op

[Ada] Import binder globals as constant

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

The various __gl_XYZ binder globals prevent some link-time optimizations
when imported as mutable. Work around this by turning them into
constants.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnarl/s-intman__android.adb, libgnarl/s-intman__lynxos.adb,
libgnarl/s-intman__posix.adb, libgnarl/s-intman__qnx.adb,
libgnarl/s-intman__solaris.adb, libgnarl/s-intman__susv3.adb,
libgnarl/s-taprob.adb, libgnarl/s-taprop__hpux-dce.adb,
libgnarl/s-taprop__linux.adb, libgnarl/s-taprop__mingw.adb,
libgnarl/s-taprop__posix.adb, libgnarl/s-taprop__qnx.adb,
libgnarl/s-taprop__solaris.adb, libgnarl/s-taprop__vxworks.adb,
libgnarl/s-taskin.adb, libgnarl/s-tasque.adb,
libgnarl/s-tpoben.adb, libgnat/a-calend.adb,
libgnat/a-excach.adb, libgnat/a-except.adb, libgnat/a-tags.adb,
libgnat/a-textio.adb, libgnat/a-witeio.adb,
libgnat/a-ztexio.adb, libgnat/g-binenv.adb,
libgnat/s-parame.adb, libgnat/s-parame__vxworks.adb,
libgnat/s-stratt.adb, libgnat/s-trasym__dwarf.adb: Mark imported
binder globals as constant.diff --git a/gcc/ada/libgnarl/s-intman__android.adb b/gcc/ada/libgnarl/s-intman__android.adb
--- a/gcc/ada/libgnarl/s-intman__android.adb
+++ b/gcc/ada/libgnarl/s-intman__android.adb
@@ -68,7 +68,7 @@ package body System.Interrupt_Management is
Exception_Interrupts : constant Interrupt_List :=
  (SIGFPE, SIGILL, SIGSEGV, SIGBUS);
 
-   Unreserve_All_Interrupts : Interfaces.C.int;
+   Unreserve_All_Interrupts : constant Interfaces.C.int;
pragma Import
  (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts");
 


diff --git a/gcc/ada/libgnarl/s-intman__lynxos.adb b/gcc/ada/libgnarl/s-intman__lynxos.adb
--- a/gcc/ada/libgnarl/s-intman__lynxos.adb
+++ b/gcc/ada/libgnarl/s-intman__lynxos.adb
@@ -68,7 +68,7 @@ package body System.Interrupt_Management is
Exception_Interrupts : constant Interrupt_List :=
  (SIGFPE, SIGILL, SIGSEGV, SIGBUS);
 
-   Unreserve_All_Interrupts : Interfaces.C.int;
+   Unreserve_All_Interrupts : constant Interfaces.C.int;
pragma Import
  (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts");
 


diff --git a/gcc/ada/libgnarl/s-intman__posix.adb b/gcc/ada/libgnarl/s-intman__posix.adb
--- a/gcc/ada/libgnarl/s-intman__posix.adb
+++ b/gcc/ada/libgnarl/s-intman__posix.adb
@@ -68,7 +68,7 @@ package body System.Interrupt_Management is
Exception_Interrupts : constant Interrupt_List :=
  (SIGFPE, SIGILL, SIGSEGV, SIGBUS);
 
-   Unreserve_All_Interrupts : Interfaces.C.int;
+   Unreserve_All_Interrupts : constant Interfaces.C.int;
pragma Import
  (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts");
 


diff --git a/gcc/ada/libgnarl/s-intman__qnx.adb b/gcc/ada/libgnarl/s-intman__qnx.adb
--- a/gcc/ada/libgnarl/s-intman__qnx.adb
+++ b/gcc/ada/libgnarl/s-intman__qnx.adb
@@ -68,7 +68,7 @@ package body System.Interrupt_Management is
Exception_Interrupts : constant Interrupt_List :=
  (SIGFPE, SIGILL, SIGSEGV, SIGBUS);
 
-   Unreserve_All_Interrupts : Interfaces.C.int;
+   Unreserve_All_Interrupts : constant Interfaces.C.int;
pragma Import
  (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts");
 


diff --git a/gcc/ada/libgnarl/s-intman__solaris.adb b/gcc/ada/libgnarl/s-intman__solaris.adb
--- a/gcc/ada/libgnarl/s-intman__solaris.adb
+++ b/gcc/ada/libgnarl/s-intman__solaris.adb
@@ -47,7 +47,7 @@ package body System.Interrupt_Management is
Exception_Interrupts : constant Interrupt_List :=
  (SIGFPE, SIGILL, SIGSEGV, SIGBUS);
 
-   Unreserve_All_Interrupts : Interfaces.C.int;
+   Unreserve_All_Interrupts : constant Interfaces.C.int;
pragma Import
  (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts");
 


diff --git a/gcc/ada/libgnarl/s-intman__susv3.adb b/gcc/ada/libgnarl/s-intman__susv3.adb
--- a/gcc/ada/libgnarl/s-intman__susv3.adb
+++ b/gcc/ada/libgnarl/s-intman__susv3.adb
@@ -56,7 +56,7 @@ package body System.Interrupt_Management is
use Interfaces.C;
use System.OS_Interface;
 
-   Unreserve_All_Interrupts : Interfaces.C.int;
+   Unreserve_All_Interrupts : constant Interfaces.C.int;
pragma Import
  (C, Unreserve_All_Interrupts, "__gl_unreserve_all_interrupts");
 


diff --git a/gcc/ada/libgnarl/s-taprob.adb b/gcc/ada/libgnarl/s-taprob.adb
--- a/gcc/ada/libgnarl/s-taprob.adb
+++ b/gcc/ada/libgnarl/s-taprob.adb
@@ -47,7 +47,7 @@ package body System.Tasking.Protected_Objects is
-- Local Data --

 
-   Locking_Policy : Character;
+   Locking_Policy : constant Character;
pragma Import (C, Locking_Policy, "__gl_locking_policy");
 
-


diff --git a/gcc/ada/libgnarl/s-taprop__hpux-dce.adb b/gcc/ada/libgnarl/s-taprop__hpux-dce.adb
--- a/gcc/ada/libgnarl/s-taprop__hpux-dce.adb
+++ b/gcc/ada/libgnarl/s-taprop__hpux-dce.adb
@@ -87,10 +87,10 @@ package body System.Task_Primitives.Operations is
Unbl

[Ada] RTEMS: use hardware interrupts instead of signals for interrupt handling

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

RTEMS supports attaching interrupt handlers to hardware interrupt
vectors, which is superior to the current approach of attaching handlers
to signals.  Direct attachment of handlers removes the execution
overhead of converting hardware interrupts to signals and their
subsequent propagation to the interrupt manager. It also removes the
limitation of the number of hardware interrupts that can be supported
under the signals model, as RTEMS is limited to 32 signals.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* Makefile.rtl (VxWorks): Rename s-inmaop__vxworks.adb to
s-inmaop__hwint.adb.
(RTEMS): Use s-inmaop__hwint.adb, s-intman__rtems.adb/s,
s-taprop__rtems.adb.
* libgnarl/a-intnam__rtems.ads: Remove signals definitions and
replace with Hardware_Interrupts.
* libgnarl/s-inmaop__vxworks.adb: Rename as...
* libgnarl/s-inmaop__hwint.adb: ... this.
* libgnarl/s-interr__hwint.adb: Remove unnecessary comments.
* libgnarl/s-intman__rtems.ads, libgnarl/s-intman__rtems.adb:
New files.
* libgnarl/s-osinte__rtems.adb: Add RTEMS API bindings.
(Binary_Semaphore_Create, Binary_Semaphore_Delete,
Binary_Semaphore_Obtain, Binary_Semaphore_Release,
Binary_Semaphore_Flush, Interrupt_Connect,
Interrupt_Number_To_Vector): New functions.
* libgnarl/s-osinte__rtems.ads (Num_HW_Interrupts, Signal):
Removed.
(NSIG, Interrupt_Range): New.
(Binary_Semaphore_Create, Binary_Semaphore_Delete,
Binary_Semaphore_Obtain, Binary_Semaphore_Release,
Binary_Semaphore_Flush, Interrupt_Connect,
Interrupt_Number_To_Vector): Remove Import pragma.
* libgnarl/s-taprop__rtems.adb: New file.

patch.diff.gz
Description: application/gzip

[Ada] Fix internal error on fixed-point divide, multiply and scaling

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

This fixes a couple of long-standing oversights in the fixed-point multiply
implementation that were recently copied into the divide implementation and
thus made more visible: when computing the operand size for compile-time
known values, the negative case must be taken into account and comparisons
with powers of 2 must be strict.  The patch also performs some refactoring.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_fixd.adb (Get_Size_For_Value): New function returning a size
suitable for a non-negative integer value.
(Get_Type_For_Size): New function returning a standard type suitable
for a size.
(Build_Divide): Call both functions to compute the result type, but
make sure to pass a non-negative value to the first.
(Build_Multiply): Likewise.
(Do_Multiply_Fixed_Universal): Minor consistency tweak.
(Integer_Literal): Call both functions to compute the type.diff --git a/gcc/ada/exp_fixd.adb b/gcc/ada/exp_fixd.adb
--- a/gcc/ada/exp_fixd.adb
+++ b/gcc/ada/exp_fixd.adb
@@ -190,6 +190,15 @@ package body Exp_Fixd is
--  The expression returned is neither analyzed nor resolved. The Etype
--  of the result is properly set (to Universal_Real).
 
+   function Get_Size_For_Value (V : Uint) return Pos;
+   --  Given a non-negative universal integer value, return the size of a small
+   --  signed integer type covering -V .. V, or Pos'Max if no such type exists.
+
+   function Get_Type_For_Size (Siz : Pos; Force : Boolean) return Entity_Id;
+   --  Return the smallest signed integer type containing at least Siz bits.
+   --  If no such type exists, return Empty if Force is False or the largest
+   --  signed integer type if Force is True.
+
function Integer_Literal
  (N: Node_Id;
   V: Uint;
@@ -324,7 +333,6 @@ package body Exp_Fixd is
   Right_Type  : constant Entity_Id  := Base_Type (Etype (R));
   Left_Size   : Int;
   Right_Size  : Int;
-  Rsize   : Int;
   Result_Type : Entity_Id;
   Rnode   : Node_Id;
 
@@ -354,20 +362,17 @@ package body Exp_Fixd is
  --  the effective size of an operand is the RM_Size of the operand.
  --  But a special case arises with operands whose size is known at
  --  compile time. In this case, we can use the actual value of the
- --  operand to get its size if it would fit in signed 8/16/32 bits.
+ --  operand to get a size if it would fit in a small signed integer.
 
  Left_Size := UI_To_Int (RM_Size (Left_Type));
 
  if Compile_Time_Known_Value (L) then
 declare
-   Val : constant Uint := Expr_Value (L);
+   Siz : constant Int :=
+   Get_Size_For_Value (UI_Abs (Expr_Value (L)));
 begin
-   if Val < Uint_2 ** 7 then
-  Left_Size := 8;
-   elsif Val < Uint_2 ** 15 then
-  Left_Size := 16;
-   elsif Val < Uint_2 ** 31 then
-  Left_Size := 32;
+   if Siz < Left_Size then
+  Left_Size := Siz;
end if;
 end;
  end if;
@@ -376,35 +381,19 @@ package body Exp_Fixd is
 
  if Compile_Time_Known_Value (R) then
 declare
-   Val : constant Uint := Expr_Value (R);
+   Siz : constant Int :=
+   Get_Size_For_Value (UI_Abs (Expr_Value (R)));
 begin
-   if Val <= Int'(2 ** 7) then
-  Right_Size := 8;
-   elsif Val <= Int'(2 ** 15) then
-  Right_Size := 16;
+   if Siz < Right_Size then
+  Right_Size := Siz;
end if;
 end;
  end if;
 
  --  Do the operation using the longer of the two sizes
 
- Rsize := Int'Max (Left_Size, Right_Size);
-
- if Rsize <= 8 then
-Result_Type := Standard_Integer_8;
-
- elsif Rsize <= 16 then
-Result_Type := Standard_Integer_16;
-
- elsif Rsize <= 32 then
-Result_Type := Standard_Integer_32;
-
- elsif Rsize <= 64 or else System_Max_Integer_Size < 128 then
-Result_Type := Standard_Integer_64;
-
- else
-Result_Type := Standard_Integer_128;
- end if;
+ Result_Type :=
+   Get_Type_For_Size (Int'Max (Left_Size, Right_Size), Force => True);
 
  Rnode :=
 Make_Op_Divide (Loc,
@@ -664,7 +653,6 @@ package body Exp_Fixd is
   Right_Type  : constant Entity_Id  := Etype (R);
   Left_Size   : Int;
   Right_Size  : Int;
-  Rsize   : Int;
   Result_Type : Entity_Id;
   Rnode   : Node_Id;
 
@@ -697,20 +685,17 @@ package body Exp_Fixd is
  --  the effective size of an operand is the RM_Size of the operand.
  --  But a special case arises with operands whose size is known at

[Ada] Find an interpretation for membership test with a singleton value

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

When resolving

   type Color is (Blue, Orange);

   function Get_Color return Color
 is begin return Blue; end Get_Color;
   function Get_Color return String
 is begin return "Blue"; end Get_Color;

   Test : Boolean := Get_Color in Blue;

we did not try all the possible interpretations of Get_Color
but only the latest.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch4.adb (Analyze_Membership_Op): Finds interpretation for
the case of a membership test with a singleton value in case of
overloading.diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -2960,6 +2960,13 @@ package body Sem_Ch4 is
   --  If a set of alternatives is present, analyze each and find the
   --  common type to which they must all resolve.
 
+  procedure Find_Interpretation;
+  function Find_Interpretation return Boolean;
+  --  Routine and wrapper to find a matching interpretation in case
+  --  of overloading. The wrapper returns True iff a matching
+  --  interpretation is found. Beware, in absence of overloading,
+  --  using this function will break gnat's bootstrapping.
+
   procedure Try_One_Interp (T1 : Entity_Id);
   --  Routine to try one proposed interpretation. Note that the context
   --  of the operation plays no role in resolving the arguments, so that
@@ -3064,6 +3071,26 @@ package body Sem_Ch4 is
  end if;
   end Analyze_Set_Membership;
 
+  -
+  -- Find_Interpretation --
+  -
+
+  procedure Find_Interpretation is
+  begin
+ Get_First_Interp (L, Index, It);
+ while Present (It.Typ) loop
+Try_One_Interp (It.Typ);
+Get_Next_Interp (Index, It);
+ end loop;
+  end Find_Interpretation;
+
+  function Find_Interpretation return Boolean is
+  begin
+ Find_Interpretation;
+
+ return Found;
+  end Find_Interpretation;
+
   
   -- Try_One_Interp --
   
@@ -3119,11 +3146,7 @@ package body Sem_Ch4 is
 Try_One_Interp (Etype (L));
 
  else
-Get_First_Interp (L, Index, It);
-while Present (It.Typ) loop
-   Try_One_Interp (It.Typ);
-   Get_Next_Interp (Index, It);
-end loop;
+Find_Interpretation;
  end if;
 
   --  If not a range, it can be a subtype mark, or else it is a degenerate
@@ -3139,13 +3162,14 @@ package body Sem_Ch4 is
 Find_Type (R);
 Check_Fully_Declared (Entity (R), R);
 
- elsif Ada_Version >= Ada_2012
-   and then Has_Compatible_Type (R, Etype (L))
+ elsif Ada_Version >= Ada_2012 and then
+   ((Is_Overloaded (L) and then Find_Interpretation) or else
+   (not Is_Overloaded (L) and then Has_Compatible_Type (R, Etype (L
  then
 if Nkind (N) = N_In then
-   Op := Make_Op_Eq (Loc, Left_Opnd  => L, Right_Opnd => R);
+   Op := Make_Op_Eq (Loc, Left_Opnd => L, Right_Opnd => R);
 else
-   Op := Make_Op_Ne (Loc, Left_Opnd  => L, Right_Opnd => R);
+   Op := Make_Op_Ne (Loc, Left_Opnd => L, Right_Opnd => R);
 end if;
 
 if Is_Record_Or_Limited_Type (Etype (L)) then

[PATCH] middle-end/102682 - avoid invalid subreg on the LHS

2021-10-11 Thread Richard Biener via Gcc-patches

The following avoids generating

(insn 6 5 7 2 (set (subreg:OI (concatn/v:TI [
(reg:DI 92 [ buffer ])
(reg:DI 93 [ buffer+8 ])
]) 0)
(subreg:OI (reg/v:V8SI 85 [ __x ]) 0)) "t.ii":76:21 74 
{*movoi_internal_avx}
 (nil))

via store_bit_field_1 when we try to store excess data into
a register allocated temporary.  The case was supposed to

  /* Use the subreg machinery either to narrow OP0 to the required
 words...

but the check ensured only an register-aligned but not a large
enough piece.  The following adds such missed check which ends up
decomposing the set to

(insn 6 5 7 (set (subreg:DI (reg/v:TI 84 [ buffer ]) 0)
(subreg:DI (reg/v:V8SI 85 [ __x ]) 0)) "t.ii":76:21 -1
 (nil))

(insn 7 6 0 (set (subreg:DI (reg/v:TI 84 [ buffer ]) 8)
(subreg:DI (reg/v:V8SI 85 [ __x ]) 8)) "t.ii":76:21 -1
 (nil))


Bootstrapped and tested on x86_64-unknown-linux-gnu, OK for trunk?

Thanks,
Richard.

2021-10-11  Richard Biener  

PR middle-end/102682
* expmed.c (store_bit_field_1): Ensure a LHS subreg would
not create a paradoxical subreg.
---
 gcc/expmed.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/expmed.c b/gcc/expmed.c
index 59734d4841c..bbdd0e71d20 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -806,7 +806,8 @@ store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, 
poly_uint64 bitnum,
}
}
   else if (constant_multiple_p (bitnum, regsize * BITS_PER_UNIT, ®num)
-  && multiple_p (bitsize, regsize * BITS_PER_UNIT))
+  && multiple_p (bitsize, regsize * BITS_PER_UNIT)
+  && known_ge (GET_MODE_BITSIZE (GET_MODE (op0)), bitsize))
{
  sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 regnum * regsize);
-- 
2.31.1

[Ada] Remove constant arguments

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

All these arguments were identified programmatically as being always
used with the same value (often the default one). As such, they can be
omitted.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* ali.adb (Get_Name): Ignore_Spaces is always False.
* bindo-graphs.adb (Set_Is_Existing_Source_Target_Relation): Val
is always True.
* cstand.adb (New_Standard_Entity): New_Node_Kind is always
N_Defininig_Identifier.
* exp_ch3.adb (Predef_Stream_Attr_Spec): For_Body is always
False.
* exp_dist.adb (Add_Parameter_To_NVList): RACW_Ctrl is always
False.
* gnatls.adb (Add_Directories): Prepend is always False.
* sem_ch10.adb, sem_ch10.ads (Load_Needed_Body): Do_Analyze is
always True.
* sem_ch3.adb, sem_ch3.ads (Process_Range_Expr_In_Decl):
R_Check_Off is always False.
* sem_elab.adb: (Info_Variable_Reference): Info_Msg is always
False, In_SPARK is always True.
(Set_Is_Traversed_Body, Set_Is_Saved_Construct,
Set_Is_Saved_Relation): Val is always True.
* treepr.adb (Visit_Descendant): No_Indent is always False.
(Print_Node): Fmt does not need such a big scope.

patch.diff.gz
Description: application/gzip

[Ada] Simplify membership tests with N_Generic_Declaration

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

Use collective N_Generic_Declaration subtype instead of its members
N_Generic_Subprogram_Declaration and N_Generic_Package_Declaration where
reasonable. Code cleanup related to handling of Global contracts in
generic units; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch10.adb, sem_prag.adb, sem_util.adb: Use
N_Generic_Declaration in membership tests.diff --git a/gcc/ada/sem_ch10.adb b/gcc/ada/sem_ch10.adb
--- a/gcc/ada/sem_ch10.adb
+++ b/gcc/ada/sem_ch10.adb
@@ -4162,8 +4162,7 @@ package body Sem_Ch10 is
   end if;
 
   if Ekind (P_Name) = E_Generic_Package
-and then Nkind (Lib_Unit) not in N_Generic_Subprogram_Declaration
-   | N_Generic_Package_Declaration
+and then Nkind (Lib_Unit) not in N_Generic_Declaration
| N_Generic_Renaming_Declaration
   then
  Error_Msg_N
@@ -6193,9 +6192,7 @@ package body Sem_Ch10 is
   ("subprogram not allowed in `LIMITED WITH` clause", N);
 return;
 
- when N_Generic_Package_Declaration
-| N_Generic_Subprogram_Declaration
- =>
+ when N_Generic_Declaration =>
 Error_Msg_N ("generic not allowed in `LIMITED WITH` clause", N);
 return;
 


diff --git a/gcc/ada/sem_prag.adb b/gcc/ada/sem_prag.adb
--- a/gcc/ada/sem_prag.adb
+++ b/gcc/ada/sem_prag.adb
@@ -6678,9 +6678,7 @@ package body Sem_Prag is
   then
  Pragma_Misplaced;
 
-  elsif (Nkind (Parent_Node) = N_Generic_Package_Declaration
-  or else Nkind (Parent_Node) =
- N_Generic_Subprogram_Declaration)
+  elsif Nkind (Parent_Node) in N_Generic_Declaration
 and then Plist = Generic_Formal_Declarations (Parent_Node)
   then
  Pragma_Misplaced;


diff --git a/gcc/ada/sem_util.adb b/gcc/ada/sem_util.adb
--- a/gcc/ada/sem_util.adb
+++ b/gcc/ada/sem_util.adb
@@ -8030,8 +8030,7 @@ package body Sem_Util is
 
 if Present (Spec_Id)
   and then Nkind (Unit_Declaration_Node (Spec_Id)) in
- N_Generic_Package_Declaration |
- N_Generic_Subprogram_Declaration
+ N_Generic_Declaration
 then
return Par;
 end if;
@@ -8055,9 +8054,7 @@ package body Sem_Util is
begin
   Par := Parent (N);
   while Present (Par) loop
- if Nkind (Par) in N_Generic_Package_Declaration
- | N_Generic_Subprogram_Declaration
- then
+ if Nkind (Par) in N_Generic_Declaration then
 return Par;
 
  elsif Nkind (Par) in N_Package_Body | N_Subprogram_Body then
@@ -8066,9 +8063,7 @@ package body Sem_Util is
 if Present (Spec_Id) then
Spec_Decl := Unit_Declaration_Node (Spec_Id);
 
-   if Nkind (Spec_Decl) in N_Generic_Package_Declaration
- | N_Generic_Subprogram_Declaration
-   then
+   if Nkind (Spec_Decl) in N_Generic_Declaration then
   return Spec_Decl;
end if;
 end if;
@@ -17891,9 +17886,7 @@ package body Sem_Util is
   --  a generic body modifies the Ekind of its spec to allow for recursive
   --  calls.
 
-  return
-Nkind (Spec_Decl) in N_Generic_Package_Declaration
-   | N_Generic_Subprogram_Declaration;
+  return Nkind (Spec_Decl) in N_Generic_Declaration;
end Is_Generic_Declaration_Or_Body;
 
---

[Ada] RTEMS: use regular RTEMS API for minimum stack size calculation

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

Use _POSIX_Threads_Minimum_stack_size instead of
ada_pthread_minimum_stack_size so the runtime does not require the RTEMS
kernel to be configured to have Ada support.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/s-parame__rtems.adb: use
_POSIX_Threads_Minimum_stack_size instead of
ada_pthread_minimum_stack_size.diff --git a/gcc/ada/libgnat/s-parame__rtems.adb b/gcc/ada/libgnat/s-parame__rtems.adb
--- a/gcc/ada/libgnat/s-parame__rtems.adb
+++ b/gcc/ada/libgnat/s-parame__rtems.adb
@@ -35,10 +35,6 @@ with Interfaces.C;
 
 package body System.Parameters is
 
-   function ada_pthread_minimum_stack_size return Interfaces.C.size_t;
-   pragma Import (C, ada_pthread_minimum_stack_size,
- "_ada_pthread_minimum_stack_size");
-
-
-- Adjust_Storage_Size --
-
@@ -61,8 +57,15 @@ package body System.Parameters is

 
function Default_Stack_Size return Size_Type is
+  Default_Stack_Size : constant Integer
+with Import, Convention => C,
+ External_Name => "__gl_default_stack_size";
begin
-  return Size_Type (ada_pthread_minimum_stack_size);
+  if Default_Stack_Size = -1 then
+ return 32 * 1024;
+  else
+ return Size_Type (Default_Stack_Size);
+  end if;
end Default_Stack_Size;
 

@@ -70,9 +73,11 @@ package body System.Parameters is

 
function Minimum_Stack_Size return Size_Type is
-
+  POSIX_Threads_Minimum_stack_size : constant Interfaces.C.size_t
+with Import, Convention => C,
+ External_Name => "_POSIX_Threads_Minimum_stack_size";
begin
-  return Size_Type (ada_pthread_minimum_stack_size);
+  return Size_Type (POSIX_Threads_Minimum_stack_size);
end Minimum_Stack_Size;
 
 end System.Parameters;

[Ada] Incorrect Dynamic_Predicate results for static arguments

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

In determining at run time whether a statically-known discrete value
satisifies the predicate of a subtype where both

   - a Dynamic_Predicate aspect specification applies (directly or
 indirectly) to a subtype; and

   - at least one other predicate aspect specification (that is, either
 a Static_Predicate aspect specification, a GNAT-defined Predicate
 aspect specification, or a second Dynamic_Predicate aspect
 specification) applies (directly or indirectly) to that same
 subtype,

sometimes only the "last" Dynamic_Predicate aspect's condition was
checked; the other predicate aspects were incorrectly ignored. This
could result in a subtype membership test incorrectly yielding a result
of True. This error is corrected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch6.adb (Can_Fold_Predicate_Call): Do not attempt folding
if there is more than one predicate involved. Recall that
predicate aspect specification are additive, not overriding, and
that there are three different predicate
aspects (Dynamic_Predicate, Static_Predicate, and the
GNAT-defined Predicate aspect). These various ways of
introducing multiple predicates are all checked for.  A new
nested function, Augments_Other_Dynamic_Predicate, is
introduced.
* sem_ch4.adb
(Analyze_Indexed_Component_Form.Process_Function_Call): When
determining whether a name like "X (Some_Discrete_Type)" might
be interpreted as a slice, the answer should be "no" if the
type/subtype name denotes the current instance of type/subtype.diff --git a/gcc/ada/exp_ch6.adb b/gcc/ada/exp_ch6.adb
--- a/gcc/ada/exp_ch6.adb
+++ b/gcc/ada/exp_ch6.adb
@@ -3143,6 +3143,13 @@ package body Exp_Ch6 is
   function Can_Fold_Predicate_Call (P : Entity_Id) return Boolean is
  Actual : Node_Id;
 
+ function Augments_Other_Dynamic_Predicate (DP_Aspect_Spec : Node_Id)
+   return Boolean;
+ --  Given a Dynamic_Predicate aspect aspecification for a
+ --  discrete type, returns True iff another DP specification
+ --  applies (indirectly, via a subtype type or a derived type)
+ --  to the same entity that this aspect spec applies to.
+
  function May_Fold (N : Node_Id) return Traverse_Result;
  --  The predicate expression is foldable if it only contains operators
  --  and literals. During this check, we also replace occurrences of
@@ -3150,6 +3157,36 @@ package body Exp_Ch6 is
  --  value of the actual. This is done on a copy of the analyzed
  --  expression for the predicate.
 
+ --
+ -- Augments_Other_Dynamic_Predicate --
+ --
+
+ function Augments_Other_Dynamic_Predicate (DP_Aspect_Spec : Node_Id)
+   return Boolean
+ is
+Aspect_Bearer : Entity_Id := Entity (DP_Aspect_Spec);
+ begin
+loop
+   Aspect_Bearer := Nearest_Ancestor (Aspect_Bearer);
+
+   if not Present (Aspect_Bearer) then
+  return False;
+   end if;
+
+   declare
+  Aspect_Spec : constant Node_Id :=
+Find_Aspect (Aspect_Bearer, Aspect_Dynamic_Predicate);
+   begin
+  if Present (Aspect_Spec)
+and then Aspect_Spec /= DP_Aspect_Spec
+  then
+ --  Found another Dynamic_Predicate aspect spec
+ return True;
+  end if;
+   end;
+end loop;
+ end Augments_Other_Dynamic_Predicate;
+
  --
  -- May_Fold --
  --
@@ -3192,7 +3229,7 @@ package body Exp_Ch6 is
 
  function Try_Fold is new Traverse_Func (May_Fold);
 
- --  Other lLocal variables
+ --  Other Local variables
 
  Subt   : constant Entity_Id := Etype (First_Entity (P));
  Aspect : Node_Id;
@@ -3220,6 +3257,11 @@ package body Exp_Ch6 is
or else Nkind (Actual) /= N_Integer_Literal
or else not Has_Dynamic_Predicate_Aspect (Subt)
or else No (Aspect)
+
+   --  Do not fold if multiple applicable predicate aspects
+   or else Present (Find_Aspect (Subt, Aspect_Static_Predicate))
+   or else Present (Find_Aspect (Subt, Aspect_Predicate))
+   or else Augments_Other_Dynamic_Predicate (Aspect)
or else CodePeer_Mode
  then
 return False;


diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -2534,6 +2534,7 @@ package body Sem_Ch4 is
   and then Is_Entity_Name (Actual)
   and then Is_Type (Entity (Actual))
   and then Is_Discrete_Type (Entity (Actual))
+  and the

[Ada] Warn about conversion with any predefined time types

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

We already had a warning for unchecked conversions that involve the
private type Ada.Calendar.Time, whose representation might differ
between releases and targets of the compiler. Now this warning is
extended to Ada.Real_Time.Time and Ada.Real_Time.Time_Span, which is
similarly non-portable.

Previously the warning message referred to Time with no quotes; now all
the type names are in quotes, both because that's how we refer to entity
names in messages and because it is actually hard to omit the quotes
with the current API for error reporting.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch13.adb (Validate_Unchecked_Conversion): Simplify code
for detecting conversions with Ada.Calendar.Time type and extend
it to similar types in the Ada.Real_Time package.diff --git a/gcc/ada/sem_ch13.adb b/gcc/ada/sem_ch13.adb
--- a/gcc/ada/sem_ch13.adb
+++ b/gcc/ada/sem_ch13.adb
@@ -17335,8 +17335,32 @@ package body Sem_Ch13 is
is
   Source : Entity_Id;
   Target : Entity_Id;
+
+  procedure Warn_Nonportable (RE : RE_Id);
+  --  Warn if either source or target of the conversion is a predefined
+  --  private type, whose representation might differ between releases and
+  --  targets of the compiler.
+
+  --
+  -- Warn_Nonportable --
+  --
+
+  procedure Warn_Nonportable (RE : RE_Id) is
+  begin
+ if Is_RTE (Source, RE) or else Is_RTE (Target, RE) then
+pragma Assert (Is_Private_Type (RTE (RE)));
+Error_Msg_NE
+  ("?z?representation of & values may change between "
+   & "'G'N'A'T versions", N, RTE (RE));
+ end if;
+  end Warn_Nonportable;
+
+  --  Local variables
+
   Vnode  : Node_Id;
 
+   --  Start of processing for Validate_Unchecked_Conversion
+
begin
   --  Obtain source and target types. Note that we call Ancestor_Subtype
   --  here because the processing for generic instantiation always makes
@@ -17353,6 +17377,18 @@ package body Sem_Ch13 is
  return;
   end if;
 
+  --  Warn if one of the operands is a private type declared in
+  --  Ada.Calendar or Ada.Real_Time. Do not emit a warning when compiling
+  --  GNAT-related sources.
+
+  if Warn_On_Unchecked_Conversion
+and then not In_Predefined_Unit (N)
+  then
+ Warn_Nonportable (RO_CA_Time);
+ Warn_Nonportable (RO_RT_Time);
+ Warn_Nonportable (RE_Time_Span);
+  end if;
+
   --  If we are dealing with private types, then do the check on their
   --  fully declared counterparts if the full declarations have been
   --  encountered (they don't have to be visible, but they must exist).
@@ -17399,32 +17435,6 @@ package body Sem_Ch13 is
  end if;
   end if;
 
-  --  Warn if one of the operands is Ada.Calendar.Time. Do not emit a
-  --  warning when compiling GNAT-related sources.
-
-  if Warn_On_Unchecked_Conversion
-and then not In_Predefined_Unit (N)
-and then RTU_Loaded (Ada_Calendar)
-and then (Chars (Source) = Name_Time
-or else
-  Chars (Target) = Name_Time)
-  then
- --  If Ada.Calendar is loaded and the name of one of the operands is
- --  Time, there is a good chance that this is Ada.Calendar.Time.
-
- declare
-Calendar_Time : constant Entity_Id := Full_View (RTE (RO_CA_Time));
- begin
-pragma Assert (Present (Calendar_Time));
-
-if Source = Calendar_Time or else Target = Calendar_Time then
-   Error_Msg_N
- ("?z?representation of 'Time values may change between "
-  & "'G'N'A'T versions", N);
-end if;
- end;
-  end if;
-
   --  Make entry in unchecked conversion table for later processing by
   --  Validate_Unchecked_Conversions, which will check sizes and alignments
   --  (using values set by the back end where possible). This is only done

[Ada] Valid postconditions incorrectly rejected.

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

For users, 'Old attribute references are only allowed within
postcondition expressions. Internally, the FE may build trees that
transiently (before some subsequent transformation) violate these rules;
this is ok, but these violations were being incorrectly flagged in some
cases. Fix this problem.

The customer's example for this ticket also demonstrates a second
problem.  Exp_Util.Insert_Actions was willing to take an action (e.g.,
the constraint check for an array indexing expression) that contains a
reference to the loop parameter of a
N_Iterated_Component/Element_Association and insert it in the tree
somewhere above that node, so that the reference ends up outside of the
scope of the declaration it refers to. This leads to a bugbox failure
(gigi is understandably unhappy with the resulting malformed tree). Fix
this too.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_attr.adb (Analyze_Attribute_Old_Result): Permit an
attribute reference inside a compiler-generated _Postconditions
procedure. In this case, Subp_Decl is assigned the declaration
of the enclosing subprogram.
* exp_util.adb (Insert_Actions): When climbing up the tree
looking for an insertion point, do not climb past an
N_Iterated_Component/Element_Association, since this could
result in inserting a reference to a loop parameter at a
location outside of the scope of that loop parameter. On the
other hand, be careful to preserve existing behavior in the case
of an N_Component_Association node.diff --git a/gcc/ada/exp_util.adb b/gcc/ada/exp_util.adb
--- a/gcc/ada/exp_util.adb
+++ b/gcc/ada/exp_util.adb
@@ -7619,8 +7619,18 @@ package body Exp_Util is
| N_Iterated_Component_Association
| N_Iterated_Element_Association
 =>
-   if Nkind (Parent (P)) = N_Aggregate
- and then Present (Loop_Actions (P))
+   if Nkind (Parent (P)) in N_Aggregate | N_Delta_Aggregate
+
+ --  We must not climb up out of an N_Iterated_xxx_Association
+ --  because the actions might contain references to the loop
+ --  parameter. But it turns out that setting the Loop_Actions
+ --  attribute in the case of an N_Component_Association
+ --  when the attribute was not already set can lead to
+ --  (as yet not understood) bugboxes (gcc failures that are
+ --  presumably due to malformed trees). So we don't do that.
+
+ and then (Nkind (P) /= N_Component_Association
+or else Present (Loop_Actions (P)))
then
   if Is_Empty_List (Loop_Actions (P)) then
  Set_Loop_Actions (P, Ins_Actions);


diff --git a/gcc/ada/sem_attr.adb b/gcc/ada/sem_attr.adb
--- a/gcc/ada/sem_attr.adb
+++ b/gcc/ada/sem_attr.adb
@@ -1413,6 +1413,15 @@ package body Sem_Attr is
return;
 end if;
 
+ --  'Old attribute reference ok in a _Postconditions procedure
+
+ elsif Nkind (Prag) = N_Subprogram_Body
+   and then not Comes_From_Source (Prag)
+   and then Nkind (Corresponding_Spec (Prag)) = N_Defining_Identifier
+   and then Chars (Corresponding_Spec (Prag)) = Name_uPostconditions
+ then
+null;
+
  --  Otherwise the placement of the attribute is illegal
 
  else
@@ -1424,6 +1433,15 @@ package body Sem_Attr is
 
  if Nkind (Prag) = N_Aspect_Specification then
 Subp_Decl := Parent (Prag);
+ elsif Nkind (Prag) = N_Subprogram_Body then
+declare
+   Enclosing_Scope : constant Node_Id :=
+ Scope (Corresponding_Spec (Prag));
+begin
+   pragma Assert (Postconditions_Proc (Enclosing_Scope)
+   = Corresponding_Spec (Prag));
+   Subp_Decl := Parent (Parent (Enclosing_Scope));
+end;
  else
 Subp_Decl := Find_Related_Declaration_Or_Body (Prag);
  end if;

[Ada] Runtime transition: System.Threads

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

Rewrite the former System.Threads implementation for AE653 to work on
the new Light runtime for VxWworks7r2Cert.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* libgnat/s-thread.ads: Fix comments.  Remove unused package
imports.
(Thread_Body_Exception_Exit): Remove Exception_Occurrence
parameter.
(ATSD): Declare type locally.
* libgnat/s-thread__ae653.adb: Fix comments.  Remove unused
package imports.  Remove package references to Stack_Limit
checking.
(Install_Handler): Remove.
(Set_Sec_Stack): Likewise.
(Thread_Body_Enter): Remove calls to Install_Handler and
Stack_Limit checking.
(Thread_Body_Exception_Exit): Remove Exception_Occurrence
parameter.
(Init_RTS): Call local Get_Sec_Stack.  Remove call to
Install_Handler.  Remove references to accessors for
Get_Sec_Stack and Set_Sec_Stack.  Remove OS check.
(Set_Sec_Stack): Remove.diff --git a/gcc/ada/libgnat/s-thread.ads b/gcc/ada/libgnat/s-thread.ads
--- a/gcc/ada/libgnat/s-thread.ads
+++ b/gcc/ada/libgnat/s-thread.ads
@@ -34,16 +34,13 @@
 
 --  This package is currently implemented for:
 
---VxWorks AE653 rts-cert
---VxWorks AE653 rts-full (not rts-kernel)
+--VxWorks7r2Cert Light
 
-with Ada.Exceptions;
 with Ada.Unchecked_Conversion;
 
 with Interfaces.C;
 
 with System.Secondary_Stack;
-with System.Soft_Links;
 
 package System.Threads is
 
@@ -81,12 +78,15 @@ package System.Threads is
procedure Thread_Body_Leave;
--  Leave thread body (normally), see above for details
 
-   procedure Thread_Body_Exceptional_Exit
- (EO : Ada.Exceptions.Exception_Occurrence);
+   procedure Thread_Body_Exceptional_Exit;
--  Leave thread body (abnormally on exception), see above for details
 
 private
 
-   type ATSD is new System.Soft_Links.TSD;
+   type ATSD is record
+  Sec_Stack_Ptr : SST.SS_Stack_Ptr;
+  --  Pointer of the allocated secondary stack
+
+   end record;
 
 end System.Threads;


diff --git a/gcc/ada/libgnat/s-thread__ae653.adb b/gcc/ada/libgnat/s-thread__ae653.adb
--- a/gcc/ada/libgnat/s-thread__ae653.adb
+++ b/gcc/ada/libgnat/s-thread__ae653.adb
@@ -29,22 +29,19 @@
 --  --
 --
 
---  This is the VxWorks 653 version of this package
+--  This is the VxWorks7r2Cert Light version of this package
 
 pragma Restrictions (No_Tasking);
---  The VxWorks 653 version of this package is intended only for programs
---  which do not use Ada tasking. This restriction ensures that this
---  will be checked by the binder.
+--  The VxWorks7r2Cert Light version of this package is intended only
+--  for programs which do not use Ada tasking. This restriction ensures
+--  that this will be checked by the binder.
 
 with System.Storage_Elements; use System.Storage_Elements;
-with System.OS_Versions; use System.OS_Versions;
 
 package body System.Threads is
 
use Interfaces.C;
 
-   package SSL renames System.Soft_Links;
-
Main_ATSD : aliased ATSD;
--  TSD for environment task
 
@@ -52,21 +49,7 @@ package body System.Threads is
pragma Thread_Local_Storage (Current_ATSD);
--  pragma TLS needed since TaskVarAdd no longer available
 
-   --  Assume guard pages for Helix APEX partitions, but leave
-   --  checking mechanism in for now, in case of surprises. ???
-   Stack_Limit : Address;
-   pragma Import (C, Stack_Limit, "__gnat_stack_limit");
-
-   type Set_Stack_Limit_Proc_Acc is access procedure;
-   pragma Convention (C, Set_Stack_Limit_Proc_Acc);
-
-   Set_Stack_Limit_Hook : Set_Stack_Limit_Proc_Acc;
-   pragma Import (C, Set_Stack_Limit_Hook, "__gnat_set_stack_limit_hook");
-   --  Procedure to be called when a task is created to set stack limit if
-   --  limit checking is used.
-
--  VxWorks specific API
-
ERROR : constant STATUS := Interfaces.C.int (-1);
OK: constant STATUS := Interfaces.C.int (0);
 
@@ -85,13 +68,8 @@ package body System.Threads is
--  It installs System.Threads versions of certain operations of the
--  run-time lib.
 
-   procedure Install_Handler;
-   pragma Import (C, Install_Handler, "__gnat_install_handler");
-
function  Get_Sec_Stack return SST.SS_Stack_Ptr;
 
-   procedure Set_Sec_Stack (Stack : SST.SS_Stack_Ptr);
-
---
-- Thread_Body_Enter --
---
@@ -108,27 +86,14 @@ package body System.Threads is
   ATSD.Sec_Stack_Ptr := Sec_Stack_Ptr;
   SST.SS_Init (ATSD.Sec_Stack_Ptr);
   Current_ATSD := Process_ATSD_Address;
-  Install_Handler;
-
-  --  Assume guard pages for Helix/Vx7, but leave in for now ???
-  --  Initialize stack limit if needed.
 
-  if Current_ATSD /= Main_ATSD'Address
-and then Set_Stack_Limit_Hook /= null
-  then
- Set_Stack_Limit_Hook.all;
-  en

[Ada] Remove redundant guard in expansion of dispatching calls

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

Routines Make_Predefined_Primitive_Specs and
Predefined_Primitive_Bodies, which create predefined primitives for
derived tagged types, are only called when restriction
No_Dispatching_Calls is inactive. There is no need to recheck this
restriction when creating individual primitive operations related to
tasking.

Code cleanup related to handling of dispatching equality in SPARK.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch3.adb (Make_Predefined_Primitive_Specs,
Predefined_Primitive_Bodies): Remove guard with restriction
No_Dispatching_Calls.diff --git a/gcc/ada/exp_ch3.adb b/gcc/ada/exp_ch3.adb
--- a/gcc/ada/exp_ch3.adb
+++ b/gcc/ada/exp_ch3.adb
@@ -10611,11 +10611,9 @@ package body Exp_Ch3 is
   --Disp_Requeue
   --Disp_Timed_Select
 
-  --  Disable the generation of these bodies if No_Dispatching_Calls,
-  --  Ravenscar or ZFP is active.
+  --  Disable the generation of these bodies if Ravenscar or ZFP is active
 
   if Ada_Version >= Ada_2005
-and then not Restriction_Active (No_Dispatching_Calls)
 and then not Restriction_Active (No_Select_Statements)
 and then RTE_Available (RE_Select_Specific_Data)
   then
@@ -11094,8 +11092,7 @@ package body Exp_Ch3 is
 
   --  The interface versions will have null bodies
 
-  --  Disable the generation of these bodies if No_Dispatching_Calls,
-  --  Ravenscar or ZFP is active.
+  --  Disable the generation of these bodies if Ravenscar or ZFP is active
 
   --  In VM targets we define these primitives in all root tagged types
   --  that are not interface types. Done because in VM targets we don't
@@ -4,7 +1,6 @@ package body Exp_Ch3 is
or else
  (not Tagged_Type_Expansion
and then Tag_Typ = Root_Type (Tag_Typ)))
-and then not Restriction_Active (No_Dispatching_Calls)
 and then not Restriction_Active (No_Select_Statements)
 and then RTE_Available (RE_Select_Specific_Data)
   then

[Ada] Fix for atomic wrongly rejected on object of discriminated type

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

The reason is that the automatic alignment promotion is not yet performed
in the case where the nominal subtype is of variable size.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/decl.c (promote_object_alignment): Add GNU_SIZE
parameter and use it for the size of the object if not null.
(gnat_to_gnu_entity) : Perform the automatic alignment
promotion for objects whose nominal subtype is of variable size.
(gnat_to_gnu_field): Adjust call to promote_object_alignment.diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c
--- a/gcc/ada/gcc-interface/decl.c
+++ b/gcc/ada/gcc-interface/decl.c
@@ -239,7 +239,7 @@ static tree validate_size (Uint, tree, Entity_Id, enum tree_code, bool, bool,
 			   const char *, const char *);
 static void set_rm_size (Uint, tree, Entity_Id);
 static unsigned int validate_alignment (Uint, Entity_Id, unsigned int);
-static unsigned int promote_object_alignment (tree, Entity_Id);
+static unsigned int promote_object_alignment (tree, tree, Entity_Id);
 static void check_ok_for_atomic_type (tree, Entity_Id, bool);
 static tree create_field_decl_from (tree, tree, tree, tree, tree,
 vec);
@@ -897,7 +897,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 	   or a reference to another object, and the size of its type is a
 	   constant, set the alignment to the smallest one which is not
 	   smaller than the size, with an appropriate cap.  */
-	if (!gnu_size && align == 0
+	if (!Known_Esize (gnat_entity)
+	&& !Known_Alignment (gnat_entity)
 	&& (Is_Full_Access (gnat_entity)
 		|| (!Optimize_Alignment_Space (gnat_entity)
 		&& kind != E_Exception
@@ -908,8 +909,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 		&& !imported_p
 		&& No (gnat_renamed_obj)
 		&& No (Address_Clause (gnat_entity
-	&& TREE_CODE (TYPE_SIZE (gnu_type)) == INTEGER_CST)
-	  align = promote_object_alignment (gnu_type, gnat_entity);
+	&& (TREE_CODE (TYPE_SIZE (gnu_type)) == INTEGER_CST || gnu_size))
+	  align = promote_object_alignment (gnu_type, gnu_size, gnat_entity);
 
 	/* If the object is set to have atomic components, find the component
 	   type and validate it.
@@ -7322,7 +7323,7 @@ gnat_to_gnu_field (Entity_Id gnat_field, tree gnu_record_type, int packed,
   if (Is_Full_Access (gnat_field))
 {
   const unsigned int align
-	= promote_object_alignment (gnu_field_type, gnat_field);
+	= promote_object_alignment (gnu_field_type, NULL_TREE, gnat_field);
   if (align > 0)
 	gnu_field_type
 	  = maybe_pad_type (gnu_field_type, NULL_TREE, align, gnat_field,
@@ -9393,11 +9394,11 @@ validate_alignment (Uint alignment, Entity_Id gnat_entity, unsigned int align)
   return align;
 }
 
-/* Promote the alignment of GNU_TYPE corresponding to GNAT_ENTITY.  Return
-   a positive value on success or zero on failure.  */
+/* Promote the alignment of GNU_TYPE for an object with GNU_SIZE corresponding
+   to GNAT_ENTITY.  Return a positive value on success or zero on failure.  */
 
 static unsigned int
-promote_object_alignment (tree gnu_type, Entity_Id gnat_entity)
+promote_object_alignment (tree gnu_type, tree gnu_size, Entity_Id gnat_entity)
 {
   unsigned int align, size_cap, align_cap;
 
@@ -9418,14 +9419,17 @@ promote_object_alignment (tree gnu_type, Entity_Id gnat_entity)
   align_cap = get_mode_alignment (ptr_mode);
 }
 
+  if (!gnu_size)
+gnu_size = TYPE_SIZE (gnu_type);
+
   /* Do the promotion within the above limits.  */
-  if (!tree_fits_uhwi_p (TYPE_SIZE (gnu_type))
-  || compare_tree_int (TYPE_SIZE (gnu_type), size_cap) > 0)
+  if (!tree_fits_uhwi_p (gnu_size)
+  || compare_tree_int (gnu_size, size_cap) > 0)
 align = 0;
-  else if (compare_tree_int (TYPE_SIZE (gnu_type), align_cap) > 0)
+  else if (compare_tree_int (gnu_size, align_cap) > 0)
 align = align_cap;
   else
-align = ceil_pow2 (tree_to_uhwi (TYPE_SIZE (gnu_type)));
+align = ceil_pow2 (tree_to_uhwi (gnu_size));
 
   /* But make sure not to under-align the object.  */
   if (align <= TYPE_ALIGN (gnu_type))

[Ada] Tweak the warning about missing local raises

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

This prevents the warning from being given when there may still be regular
exception handlers in the code, although some of them have been turned into
local raises, by querying the predicate that determines whether such regular
handlers are removed or not in the front-end.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/trans.c (gnat_to_gnu) :
Given the warning only if No_Exception_Propagation is active.
: Likewise.
: Likewise.diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c
--- a/gcc/ada/gcc-interface/trans.c
+++ b/gcc/ada/gcc-interface/trans.c
@@ -7872,21 +7872,24 @@ gnat_to_gnu (Node_Id gnat_node)
 case N_Pop_Constraint_Error_Label:
   gnat_temp = gnu_constraint_error_label_stack.pop ();
   if (Present (gnat_temp)
-	  && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false)))
+	  && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false))
+	  && No_Exception_Propagation_Active ())
 	Warn_If_No_Local_Raise (gnat_temp);
   break;
 
 case N_Pop_Storage_Error_Label:
   gnat_temp = gnu_storage_error_label_stack.pop ();
   if (Present (gnat_temp)
-	  && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false)))
+	  && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false))
+	  && No_Exception_Propagation_Active ())
 	Warn_If_No_Local_Raise (gnat_temp);
   break;
 
 case N_Pop_Program_Error_Label:
   gnat_temp = gnu_program_error_label_stack.pop ();
   if (Present (gnat_temp)
-	  && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false)))
+	  && !TREE_USED (gnat_to_gnu_entity (gnat_temp, NULL_TREE, false))
+	  && No_Exception_Propagation_Active ())
 	Warn_If_No_Local_Raise (gnat_temp);
   break;

[Ada] Fix problematic import of type-generic GCC atomic builtin

2021-10-11 Thread Pierre-Marie de Rodat via Gcc-patches

This implements the support for most type-generic GCC atomic builtins.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/gigi.h (resolve_atomic_size): Declare.
(list_third): New inline function.
* gcc-interface/decl.c (type_for_atomic_builtin_p): New function.
(resolve_atomic_builtin): Likewise.
(gnat_to_gnu_subprog_type): Perform type resolution for most of
type-generic GCC atomic builtins and give an error for the rest.
* gcc-interface/utils2.c (resolve_atomic_size): Make public.diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c
--- a/gcc/ada/gcc-interface/decl.c
+++ b/gcc/ada/gcc-interface/decl.c
@@ -241,6 +241,8 @@ static void set_rm_size (Uint, tree, Entity_Id);
 static unsigned int validate_alignment (Uint, Entity_Id, unsigned int);
 static unsigned int promote_object_alignment (tree, tree, Entity_Id);
 static void check_ok_for_atomic_type (tree, Entity_Id, bool);
+static bool type_for_atomic_builtin_p (tree);
+static tree resolve_atomic_builtin (enum built_in_function, tree);
 static tree create_field_decl_from (tree, tree, tree, tree, tree,
 vec);
 static tree create_rep_part (tree, tree, tree);
@@ -6312,14 +6314,106 @@ gnat_to_gnu_subprog_type (Entity_Id gnat_subprog, bool definition,
 	 the checker is expected to post diagnostics in this case.  */
 	  if (gnu_builtin_decl)
 	{
-	  const intrin_binding_t inb
-		= { gnat_subprog, gnu_type, TREE_TYPE (gnu_builtin_decl) };
-
-	  if (!intrin_profiles_compatible_p (&inb))
-		post_error
-		  ("??profile of& doesn''t match the builtin it binds!",
-		   gnat_subprog);
-	  return gnu_builtin_decl;
+	  if (fndecl_built_in_p (gnu_builtin_decl, BUILT_IN_NORMAL))
+		{
+		  const enum built_in_function fncode
+		= DECL_FUNCTION_CODE (gnu_builtin_decl);
+
+		  switch (fncode)
+		  {
+		case BUILT_IN_SYNC_FETCH_AND_ADD_N:
+		case BUILT_IN_SYNC_FETCH_AND_SUB_N:
+		case BUILT_IN_SYNC_FETCH_AND_OR_N:
+		case BUILT_IN_SYNC_FETCH_AND_AND_N:
+		case BUILT_IN_SYNC_FETCH_AND_XOR_N:
+		case BUILT_IN_SYNC_FETCH_AND_NAND_N:
+		case BUILT_IN_SYNC_ADD_AND_FETCH_N:
+		case BUILT_IN_SYNC_SUB_AND_FETCH_N:
+		case BUILT_IN_SYNC_OR_AND_FETCH_N:
+		case BUILT_IN_SYNC_AND_AND_FETCH_N:
+		case BUILT_IN_SYNC_XOR_AND_FETCH_N:
+		case BUILT_IN_SYNC_NAND_AND_FETCH_N:
+		case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N:
+		case BUILT_IN_SYNC_LOCK_TEST_AND_SET_N:
+		case BUILT_IN_ATOMIC_EXCHANGE_N:
+		case BUILT_IN_ATOMIC_LOAD_N:
+		case BUILT_IN_ATOMIC_ADD_FETCH_N:
+		case BUILT_IN_ATOMIC_SUB_FETCH_N:
+		case BUILT_IN_ATOMIC_AND_FETCH_N:
+		case BUILT_IN_ATOMIC_NAND_FETCH_N:
+		case BUILT_IN_ATOMIC_XOR_FETCH_N:
+		case BUILT_IN_ATOMIC_OR_FETCH_N:
+		case BUILT_IN_ATOMIC_FETCH_ADD_N:
+		case BUILT_IN_ATOMIC_FETCH_SUB_N:
+		case BUILT_IN_ATOMIC_FETCH_AND_N:
+		case BUILT_IN_ATOMIC_FETCH_NAND_N:
+		case BUILT_IN_ATOMIC_FETCH_XOR_N:
+		case BUILT_IN_ATOMIC_FETCH_OR_N:
+		  /* This is a generic builtin overloaded on its return
+			 type, so do type resolution based on it.  */
+		  if (!VOID_TYPE_P (gnu_return_type)
+			  && type_for_atomic_builtin_p (gnu_return_type))
+			gnu_builtin_decl
+			  = resolve_atomic_builtin (fncode, gnu_return_type);
+		  else
+			{
+			  post_error
+			("??cannot import type-generic 'G'C'C builtin!",
+			 gnat_subprog);
+			  post_error
+			("\\?use a supported result type",
+			 gnat_subprog);
+			  gnu_builtin_decl = NULL_TREE;
+			}
+		  break;
+
+		case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_N:
+		  /* This is a generic builtin overloaded on its third
+			 parameter type, so do type resolution based on it.  */
+		  if (list_length (gnu_param_type_list) >= 4
+			  && type_for_atomic_builtin_p
+			   (list_third (gnu_param_type_list)))
+			gnu_builtin_decl
+			  = resolve_atomic_builtin
+			  (fncode, list_third (gnu_param_type_list));
+		  else
+			{
+			  post_error
+			("??cannot import type-generic 'G'C'C builtin!",
+			 gnat_subprog);
+			  post_error
+			("\\?use a supported third parameter type",
+			 gnat_subprog);
+			  gnu_builtin_decl = NULL_TREE;
+			}
+		  break;
+
+		case BUILT_IN_SYNC_BOOL_COMPARE_AND_SWAP_N:
+		case BUILT_IN_SYNC_LOCK_RELEASE_N:
+		case BUILT_IN_ATOMIC_STORE_N:
+		  post_error
+			("??unsupported type-generic 'G'C'C builtin!",
+			 gnat_subprog);
+		  gnu_builtin_decl = NULL_TREE;
+		  break;
+
+		default:
+		  break;
+		  }
+		}
+
+	  if (gnu_builtin_decl)
+		{
+		  const intrin_binding_t inb
+		= { gnat_subprog, gnu_type, TREE_TYPE (gnu_builtin_decl) };
+
+		  if (!intrin_profiles_compatible_p (&inb))
+		post_error
+		  ("??profile of& doesn''t match the builtin it binds!",
+		   gnat_subprog);
+
+		  return gnu_builtin_decl;
+		}
 	}
 
 	  /* Inability to find

[PATCH 1/2] OpenMP: Handle reference-typed struct members

2021-10-11 Thread Julian Brown

This patch fixes the baseptrs-3.C test case introduced in the patch:

  https://gcc.gnu.org/pipermail/gcc-patches/2021-October/580729.html

The problematic case concerns OpenMP mapping clauses containing struct
members of reference type, e.g. "mystruct.myref.myptr[:N]".  To be able
to access the array slice through the reference in the middle, we need
to perform an attach action for that reference, since it is represented
internally as a pointer.

I don't think the spec allows for this case explicitly.  The closest
clause is (OpenMP 5.0, "2.19.7.1 map Clause"):

  "If the type of a list item is a reference to a type T then the
   reference in the device data environment is initialized to refer to
   the object in the device data environment that corresponds to the
   object referenced by the list item. If mapping occurs, it occurs as
   though the object were mapped through a pointer with an array section
   of type T and length one."

The patch as is allows the mapping to work with just
"mystruct.myref.myptr[:N]", without an explicit "mystruct.myref"
mapping also (because, would that refer to the hidden pointer used by
the reference, or the automatically-dereferenced data itself?). An
attach/detach operation is thus synthesised for the reference.

Tested with offloading to NVPTX and bootstrapped. OK (pending
previously-posted series?).

Julian Brown

2021-10-11  Julian Brown  

gcc/cp/
* semantics.c (finish_omp_clauses): Handle reference-typed members.

gcc/
* gimplify.c (build_struct_group): Arrange for attach/detach nodes to
be created for reference-typed struct members for OpenMP.  Only create
firstprivate_pointer/firstprivate_reference nodes for innermost struct
accesses, those with an optionally-indirected DECL_P base.
(omp_build_struct_sibling_lists): Handle two-element chain for inner
struct component returned from build_struct_group.

libgomp/
* testsuite/libgomp.c++/baseptrs-3.C: Remove XFAILs and extend test.
---
 gcc/cp/semantics.c |   4 +
 gcc/gimplify.c |  56 +--
 libgomp/testsuite/libgomp.c++/baseptrs-3.C | 109 +++--
 3 files changed, 154 insertions(+), 15 deletions(-)

diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index a50ec0ad883..bb8577d0d36 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -7862,6 +7862,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type 
ort)
  STRIP_NOPS (t);
  if (TREE_CODE (t) == POINTER_PLUS_EXPR)
t = TREE_OPERAND (t, 0);
+ if (REFERENCE_REF_P (t))
+   t = TREE_OPERAND (t, 0);
}
}
  while (TREE_CODE (t) == COMPONENT_REF);
@@ -7961,6 +7963,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type 
ort)
{
  t = TREE_OPERAND (TREE_OPERAND (t, 0), 0);
  indir_component_ref_p = true;
+ if (REFERENCE_REF_P (t))
+   t = TREE_OPERAND (t, 0);
  STRIP_NOPS (t);
  if (TREE_CODE (t) == POINTER_PLUS_EXPR)
t = TREE_OPERAND (t, 0);
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 3d444d1836f..d187dfe1ef2 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -10249,7 +10249,10 @@ build_struct_group (enum omp_region_type region_type, 
enum tree_code code,
   /* FIXME: If we're not mapping the base pointer in some other clause on this
  directive, I think we want to create ALLOC/RELEASE here -- i.e. not
  early-exit.  */
-  if (openmp && attach_detach)
+  if (openmp
+  && attach_detach
+  && !(TREE_CODE (TREE_TYPE (ocd)) == REFERENCE_TYPE
+  && TREE_CODE (TREE_TYPE (TREE_TYPE (ocd))) != POINTER_TYPE))
 return NULL;
 
 #ifdef NOISY_SIBLING_LISTS
@@ -10317,9 +10320,32 @@ build_struct_group (enum omp_region_type region_type, 
enum tree_code code,
 
   tree noind = strip_indirections (base);
 
-  if (!openmp
+  if (openmp
+ && TREE_CODE (TREE_TYPE (noind)) == REFERENCE_TYPE
  && (region_type & ORT_TARGET)
  && TREE_CODE (noind) == COMPONENT_REF)
+   {
+ tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end),
+ OMP_CLAUSE_MAP);
+ OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_TO);
+ OMP_CLAUSE_DECL (c2) = unshare_expr (base);
+ OMP_CLAUSE_SIZE (c2) = TYPE_SIZE_UNIT (TREE_TYPE (noind));
+
+ tree c3 = build_omp_clause (OMP_CLAUSE_LOCATION (grp_end),
+ OMP_CLAUSE_MAP);
+ OMP_CLAUSE_SET_MAP_KIND (c3, GOMP_MAP_ATTACH_DETACH);
+ OMP_CLAUSE_DECL (c3) = unshare_expr (noind);
+ OMP_CLAUSE_SIZE (c3) = size_zero_node;
+
+ OMP_CLAUSE_CHAIN (c2) = c3;
+ OMP_CLAUSE_CHAIN (c3) = NULL_TREE;
+
+ *inner = c2;

[PATCH 2/2] OpenACC: Make deep-copy-arrayofstruct.c a libgomp/runtime test

2021-10-11 Thread Julian Brown

I noticed that the test in question now compiles properly, and in fact
runs properly too.  Thus it's more useful as a runtime test than a
passing compilation test that otherwise doesn't do much.  This patch
moves it to libgomp.

Tested with offloading to NVPTX and bootstrapped. (I can probably
self-approve as a testsuite-only change, but the patch depends on
previously-posted series).

Julian

2021-10-11  Julian Brown  

gcc/testsuite/
* libgomp.oacc-c-c++-common/deep-copy-arrayofstruct.c: Move test from
here.

libgomp/
* testsuite/libgomp.oacc-c-c++-common/deep-copy-arrayofstruct.c: Move
test to here.
---
 .../libgomp.oacc-c-c++-common}/deep-copy-arrayofstruct.c| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename {gcc/testsuite/c-c++-common/goacc => 
libgomp/testsuite/libgomp.oacc-c-c++-common}/deep-copy-arrayofstruct.c (98%)

diff --git a/gcc/testsuite/c-c++-common/goacc/deep-copy-arrayofstruct.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-arrayofstruct.c
similarity index 98%
rename from gcc/testsuite/c-c++-common/goacc/deep-copy-arrayofstruct.c
rename to libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-arrayofstruct.c
index 4247607b61c..a11c64749cc 100644
--- a/gcc/testsuite/c-c++-common/goacc/deep-copy-arrayofstruct.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-arrayofstruct.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do run } */
 
 #include 
 #include 
-- 
2.29.2

Re: [PATCH 06/13] arm: Fix mve_vmvnq_n_ argument mode

2021-10-11 Thread Richard Sandiford via Gcc-patches

Christophe Lyon via Gcc-patches  writes:
> The vmvnq_n* intrinsics and have [u]int[16|32]_t arguments, so use
>  iterator instead of HI in mve_vmvnq_n_.
>
> 2021-09-03  Christophe Lyon  
>
>   gcc/
>   * config/arm/mve.md (mve_vmvnq_n_): Use V_elem mode
>   for operand 1.
>
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index e393518ea88..14d17060290 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -617,7 +617,7 @@ (define_insn "mve_vcvtaq_"
>  (define_insn "mve_vmvnq_n_"
>[
> (set (match_operand:MVE_5 0 "s_register_operand" "=w")
> - (unspec:MVE_5 [(match_operand:HI 1 "immediate_operand" "i")]
> + (unspec:MVE_5 [(match_operand: 1 "immediate_operand" "i")]
>VMVNQ_N))
>]
>"TARGET_HAVE_MVE"

I agree this is correct, but there's also the issue that the
predicate is too broad.  At the moment it allows any immediate,
so things like:

  #include 
  int32x4_t foo(void) { return vmvnq_n_s32(0x12345678); }

are accepted by the compiler and only rejected by the assembler.
Not your bug to fix, just saying :-)

Thanks,
Richard

Re: [PATCH] options: Fix variable tracking option processing.

2021-10-11 Thread Richard Biener via Gcc-patches

On Mon, Oct 11, 2021 at 3:21 PM Martin Liška  wrote:
>
> On 10/11/21 15:05, Richard Biener wrote:
> >> +  if (!opts_set->x_flag_var_tracking)
> >> +opts->x_flag_var_tracking = optimize >= 1;
> > That's still not equivalent to the old code for -fvar-tracking-uninit which
> > sets opts->x_flag_var_tracking to 1 and the old code checked that
> > for AUTOINIT_VALUE but you override it here for -O0.
> >
>
> Do you mean the newly added code:
>
> +  if (!opts_set->x_flag_var_tracking)
>
> +opts->x_flag_var_tracking = optimize >= 1;
>
>
> that should be equivalent to:
>
> -  if (flag_var_tracking == AUTODETECT_VALUE)
>
> -flag_var_tracking = optimize >= 1;
>
>
> ? Or do I miss something?

Yes.  I think to be equivalent it would need to be

   if (!opts_set->x_flag_var_tracking_uninit
   && !opts_set->x_flag_var_tracking)
 opts->x_flag_var_tracking = optimize >= 1;

see how in the old code the order of the tests makes a difference
because we test flag_* we also set.  Please double-check the change
with regard to that.

Btw, I'd be more comfortable when the move of the code would be
independent of the adjustment to not rely on AUTODETECT_VALUE.
Can we do the latter change first (IIRC the former one failed already)?

Richard.

>
> Thanks,
> Martin

Re: [PATCH v3 1/6] rs6000: Support SSE4.1 "round" intrinsics

2021-10-11 Thread Paul A. Clarke via Gcc-patches

On Fri, Oct 08, 2021 at 05:31:11PM -0500, Segher Boessenkool wrote:
> On Fri, Oct 08, 2021 at 02:27:28PM -0500, Paul A. Clarke wrote:
> > On Fri, Oct 08, 2021 at 12:39:15PM -0500, Segher Boessenkool wrote:
> > I see. Thanks for the reference. If I understand correctly, volatile
> > prevents some optimizations based on the defined inputs/outputs, but
> > the asm could still be subject to reordering.
> 
> "asm volatile" means there is a side effect in the asm.  This means that
> it has to be executed on the real machine the same as on the abstract
> machine, with the side effects in the same order.
> 
> It can still be reordered, modulo those restrictions.  It can be merged
> with an identical asm as well.  And the compiler can split this into two
> identical asms on two paths.

It seems odd to me that the compiler can make any assumptions about
the side-effect(s). How does it know that a side-effect does not alter
computation (as it indeed does in this case), such that reordering is
a still correct (which it wouldn't be in this case)?

> In this case you might want a side effect (the instructions writes to
> the FPSCR after all).  But you need this to be tied to the FP code that
> you want the flags to be changed for, and to the restore of the flags,
> and finally you need to prevent other FP code from being scheduled in
> between.
> 
> You need more for that than just volatile, and the solution may well
> make volatile not wanted: tying the insns together somehow will
> naturally make the flags restored to a sane situation again, so the
> whole group can be removed if you want, etc.
> 
> > In this particular case, I don't think it's an issue with respect to
> > reordering.  The code in question is:
> > +  __asm__ __volatile__ ("mffsce %0" : "=f" (__fpscr_save.__fr));
> > +  __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
> > 
> > The output (__fpscr_save) is a source for the following assignment,
> > so the order should be respected, no?
> 
> Other FP code can be interleaved, and then do the wrong thing.
> 
> > With respect to volatile, I worry about removing it, because I do
> > indeed need that instruction to execute in order to clear the FPSCR
> > exception enable bits. That side-effect is not otherwise known to the
> > compiler.
> 
> Yes.  But as said above, volatile isn't enough to get this to behave
> correctly.
> 
> The easiest way out is to write this all in one piece of (inline) asm.

Ugh. I really don't want to go there, not just because it's work, but
I think this is a paradigm that should work without needing to drop
fully into asm.

Is there something unique about using an "asm" statement versus using,
say, a builtin like __builtin_mtfsf or a hypothetical __builtin_mffsce?
Very similar methods are used in glibc today. Are those broken?

Would creating a __builtin_mffsce be another solution?

Would adding memory barriers between the FPSCR manipulations and the
code which is bracketed by them be sufficient?

PC

Re: [PATCH] gcov: make profile merging smarter

2021-10-11 Thread Martin Liška


On 10/5/21 12:04, Richard Biener wrote:

On Mon, Oct 4, 2021 at 1:32 PM Martin Liška  wrote:


On 10/4/21 13:16, Richard Biener wrote:

I meant in merge_one_data do not check ->stamp or ->checksum but instead rely
on the counter merging code to detect mismatches (there's read_mismatch and
read_error).  There's multiple things we can do when we run into those:

   - when we did not actually merged any counter yet we could issue the
 warning as before and drop the old data on the floor
   - when we_did_  merge some counters already we could hard-error
 (I suppose we can't roll-back merging that took place already)
   - we could do the merging two-stage, first see whether the data matches
 and only if it did perform the merging


I've got your point, you are basically suggesting a fine grained merging
(function based). Huh, I don't like it much as it's typically a mistake
in the build setup that 2 objects (with a different checksum) want to emit
profile to the same .gcda file.


I agree, it's usually a mistake.


My patch handles the obvious situation where an object file is built exactly
the same way (so no e.g. -O0 and -O2).


Yeah, but then the two profiles may not be related at all ...


Well, it's quite common case that one object file is then linked into multiple
binaries (e.g. util.o in a project). We collect also sum_max:
Sum of individual run max values.
which helps handling such a situation.





Note that all of the changes (including yours) have user-visible effects and
the behavior is somewhat unobvious.  Not merging when the object was
re-built is indeed the most obvious behavior so I'm not sure it's a good
idea.  A new env variable to say whether to simply keep the_old_  data
when merging in new data isn't possible would be another "fix" I guess?


Even for a situation when checksum matches, but the timestamp is different?
Sure, we can provide env. variables that can tweak the behavior.


I suppose another distinguishing factor might be the name of the executable.


Well, at compile time, we don't know name of a final executable.



But yeah, in the end it's a fishy area ...

So I guess your originally posted patch might be the best way to go - can you
try to amend the documentation as for the behavior with respect to
re-compiling and profile merging?  I suppose that if you re-compile just
a single .o you currently merge into all the other .o file counters but _not_
into the newly compiled old counters.


Yes, I can update the documentation.


That would make coverage off
as well for incremental re-compiling?


Yes.



I only can find

@item
Run the program on a representative workload to generate the arc profile
information.  This may be repeated any number of times.  You can run
concurrent instances of your program, and provided that the file system
supports locking, the data files will be correctly updated.  Unless
a strict ISO C dialect option is in effect, @code{fork} calls are
detected and correctly handled without double counting.

but that's under -coverage, not sure if there's a better place to amend.

Note I see there's -fprofile-dir which eventually can be used to "fix"
the SPEC issue as well?


We would have to provide a different option value of -fprofile-dir for both
binaries. That's something we can't easily do in a SPEC config file.

Let me update the documentation bits.

Martin



Richard.


Cheers,
Martin

Re: [PATCH 07/13] arm: Implement MVE predicates as vectors of booleans

2021-10-11 Thread Richard Sandiford via Gcc-patches

Christophe Lyon via Gcc-patches  writes:
> This patch implements support for vectors of booleans to support MVE
> predicates, instead of HImode.  Since the ABI mandates pred16_t (aka
> uint16_t) to represent predicates in intrinsics prototypes, we
> introduce a new "predicate" type qualifier so that we can map relevant
> builtins HImode arguments and return value to the appropriate vector
> of booleans (VxBI).
>
> We have to update test_vector_ops_duplicate, because it iterates using
> an offset in bytes, where we would need to iterate in bits: we stop
> iterating when we reach the end of the vector of booleans.
>
> 2021-09-01  Christophe Lyon  
>
>   gcc/
>   PR target/100757
>   PR target/101325
>   * config/arm/arm-builtins.c (arm_type_qualifiers): Add 
> qualifier_predicate.
>   (arm_init_simd_builtin_types): Add new simd types.
>   (arm_init_builtin): Map predicate vectors arguments to HImode.
>   (arm_expand_builtin_args): Move HImode predicate arguments to VxBI
>   rtx. Move return value to HImode rtx.
>   * config/arm/arm-modes.def (V16BI, V8BI, V4BI): New modes.
>   * config/arm/arm-simd-builtin-types.def (Pred1x16_t,
>   Pred2x8_t,Pred4x4_t): New.
>   * simplify-rtx.c (test_vector_ops_duplicate): Avoid going past the
>   end of the test vector.
>
> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
> index 3a9ff8f26b8..771759f0cdd 100644
> --- a/gcc/config/arm/arm-builtins.c
> +++ b/gcc/config/arm/arm-builtins.c
> @@ -92,7 +92,9 @@ enum arm_type_qualifiers
>qualifier_lane_pair_index = 0x1000,
>/* Lane indices selected in quadtuplets - must be within range of previous
>   argument = a vector.  */
> -  qualifier_lane_quadtup_index = 0x2000
> +  qualifier_lane_quadtup_index = 0x2000,
> +  /* MVE vector predicates.  */
> +  qualifier_predicate = 0x4000
>  };
>  
>  /*  The qualifier_internal allows generation of a unary builtin from
> @@ -1633,6 +1635,13 @@ arm_init_simd_builtin_types (void)
>arm_simd_types[Bfloat16x4_t].eltype = arm_bf16_type_node;
>arm_simd_types[Bfloat16x8_t].eltype = arm_bf16_type_node;
>  
> +  if (TARGET_HAVE_MVE)
> +{
> +  arm_simd_types[Pred1x16_t].eltype = unsigned_intHI_type_node;
> +  arm_simd_types[Pred2x8_t].eltype = unsigned_intHI_type_node;
> +  arm_simd_types[Pred4x4_t].eltype = unsigned_intHI_type_node;
> +}
> +
>for (i = 0; i < nelts; i++)
>  {
>tree eltype = arm_simd_types[i].eltype;
> @@ -1780,6 +1789,11 @@ arm_init_builtin (unsigned int fcode, 
> arm_builtin_datum *d,
>if (qualifiers & qualifier_map_mode)
>   op_mode = d->mode;
>  
> +  /* MVE Predicates use HImode as mandated by the ABI: pred16_t is 
> unsigned
> +  short.  */
> +  if (qualifiers & qualifier_predicate)
> + op_mode = HImode;
> +
>/* For pointers, we want a pointer to the basic type
>of the vector.  */
>if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
> @@ -3024,6 +3038,11 @@ arm_expand_builtin_args (rtx target, machine_mode 
> map_mode, int fcode,
>   case ARG_BUILTIN_COPY_TO_REG:
> if (POINTER_TYPE_P (TREE_TYPE (arg[argc])))
>   op[argc] = convert_memory_address (Pmode, op[argc]);
> +
> +   /* MVE uses mve_pred16_t (aka HImode) for vectors of predicates.  
> */
> +   if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL)
> + op[argc] = gen_lowpart (mode[argc], op[argc]);
> +
> /*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */
> if (!(*insn_data[icode].operand[opno].predicate)
> (op[argc], mode[argc]))
> @@ -3229,6 +3248,13 @@ constant_arg:
>else
>  emit_insn (insn);
>  
> +  if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL)
> +{
> +  rtx HItarget = gen_reg_rtx (HImode);
> +  emit_move_insn (HItarget, gen_lowpart (HImode, target));
> +  return HItarget;
> +}
> +
>return target;
>  }
>  
> diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
> index a5e74ba3943..b414a709a62 100644
> --- a/gcc/config/arm/arm-modes.def
> +++ b/gcc/config/arm/arm-modes.def
> @@ -84,6 +84,11 @@ VECTOR_MODE (FLOAT, BF, 2);   /* V2BF.  */
>  VECTOR_MODE (FLOAT, BF, 4);   /*  V4BF.  */
>  VECTOR_MODE (FLOAT, BF, 8);   /*  V8BF.  */
>  
> +/* Predicates for MVE.  */
> +VECTOR_BOOL_MODE (V16BI, 16, 2);
> +VECTOR_BOOL_MODE (V8BI, 8, 2);
> +VECTOR_BOOL_MODE (V4BI, 4, 2);
> +
>  /* Fraction and accumulator vector modes.  */
>  VECTOR_MODES (FRACT, 4);  /* V4QQ  V2HQ */
>  VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */
> diff --git a/gcc/config/arm/arm-simd-builtin-types.def 
> b/gcc/config/arm/arm-simd-builtin-types.def
> index c19a1b6e3eb..d3987985b4c 100644
> --- a/gcc/config/arm/arm-simd-builtin-types.def
> +++ b/gcc/config/arm/arm-simd-builtin-types.def
> @@ -51,3 +51,7 @@
>ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
>EN

Re: [Patch 4/7, Arm. GCC] Implement target feature macros for PACBTI.

2021-10-11 Thread Richard Earnshaw via Gcc-patches


On 08/10/2021 13:18, Tejas Belagod via Gcc-patches wrote:

Hi,

This patch implements target feature macros when PACBTI is
enabled through the -march option or -mbranch-protection.

Tested on arm-none-eabi. OK for trunk?

2021-10-04  Tejas Belagod  

gcc/ChangeLog:

* config/arm/arm-c.c (arm_cpu_builtins): Define
__ARM_FEATURE_BTI_DEFAULT and __ARM_FEATURE_PAC_DEFAULT.

gcc/testsuite/ChangeLog:

* gcc.target/arm/acle/pacbti-m-predef-2.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-4.c: New test.
* gcc.target/arm/acle/pacbti-m-predef-5.c: New test.



I presume the specification for this is ACLE - please say so rather than 
making me guess.



+  cpp_undef (pfile, "__ARM_FEATURE_BTI_DEFAULT");
+  cpp_undef (pfile, "__ARM_FEATURE_PAC_DEFAULT");
+  if (TARGET_HAVE_PACBTI)
+{
+  builtin_define_with_int_value ("__ARM_FEATURE_BTI_DEFAULT",
+arm_enable_pacbti & 0x1);

My reading of the ACLE specification would suggest this shouldn't be 
defined if it would have a value of 0, but that's not what this code 
does.  I think it would be better to move this outside the 
TARGET_HAVE_PACBTI and use the def_or_undef approach.


+  builtin_define_with_int_value ("__ARM_FEATURE_PAC_DEFAULT",
+arm_enable_pacbti >> 1);

This one is less clear, could the value ever be zero?  I guess exactly 
one of a-key and b-key must be defined and each has a separate bit.


+}
+
+

Not more than one blank line at the end of a block.


diff --git a/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-2.c 
b/gcc/testsuite/gcc.target/arm/acle/pacbti-m-predef-2.c



Given what I've said above, I think you need to also test that 
__ARM_FEATURE_BTI_DEFAULT is defined before testing the value (and 
emitting #error if it isn't).


R.

Re: [PATCH 08/13] arm: Implement auto-vectorized MVE comparisons with vectors of boolean predicates

2021-10-11 Thread Richard Sandiford via Gcc-patches

Christophe Lyon via Gcc-patches  writes:
> We make use of qualifier_predicate to describe MVE builtins
> prototypes, restricting to auto-vectorizable vcmp* and vpsel builtins,
> as they are exercised by the tests added earlier in the series.
>
> Special handling is needed for mve_vpselq because it has a v2di
> variant, which has no natural VPR.P0 representation: we keep HImode
> for it.
>
> The vector_compare expansion code is updated to use the right VxBI
> mode instead of HI for the result.
>
> New mov patterns are introduced to handle the new modes.
>
> 2021-09-01  Christophe Lyon 
>
>   gcc/
>   PR target/100757
>   PR target/101325
>   * config/arm/arm-builtins.c (BINOP_PRED_UNONE_UNONE_QUALIFIERS)
>   (BINOP_PRED_NONE_NONE_QUALIFIERS)
>   (TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS)
>   (TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS): New.
>   * config/arm/arm.c (arm_hard_regno_mode_ok): Handle new VxBI
>   modes.
>   (arm_mode_to_pred_mode): New.
>   (arm_expand_vector_compare): Use the right VxBI mode instead of
>   HI.
>   (arm_expand_vcond): Likewise.
>   * config/arm/arm_mve_builtins.def (vcmpneq_, vcmphiq_, vcmpcsq_)
>   (vcmpltq_, vcmpleq_, vcmpgtq_, vcmpgeq_, vcmpeqq_, vcmpneq_f)
>   (vcmpltq_f, vcmpleq_f, vcmpgtq_f, vcmpgeq_f, vcmpeqq_f, vpselq_u)
>   (vpselq_s, vpselq_f): Use new predicated qualifiers.
>   * config/arm/iterators.md (MVE_7): New mode iterator.
>   (MVE_VPRED, MVE_vpred): New attribute iterators.
>   * config/arm/mve.md (@mve_vcmpq_)
>   (@mve_vcmpq_f, @mve_vpselq_)
>   (@mve_vpselq_f): Use MVE_VPRED instead of HI.
>   (@mve_vpselq_v2di): Define separately.
>   (mov): New expander for VxBI modes.
>   (mve_mov): New insn for VxBI modes.
>
> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
> index 771759f0cdd..6e3638869f1 100644
> --- a/gcc/config/arm/arm-builtins.c
> +++ b/gcc/config/arm/arm-builtins.c
> @@ -469,6 +469,12 @@ 
> arm_binop_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  #define BINOP_UNONE_UNONE_UNONE_QUALIFIERS \
>(arm_binop_unone_unone_unone_qualifiers)
>  
> +static enum arm_type_qualifiers
> +arm_binop_pred_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_predicate, qualifier_unsigned, qualifier_unsigned };
> +#define BINOP_PRED_UNONE_UNONE_QUALIFIERS \
> +  (arm_binop_pred_unone_unone_qualifiers)
> +
>  static enum arm_type_qualifiers
>  arm_binop_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_unsigned, qualifier_none, qualifier_immediate };
> @@ -487,6 +493,12 @@ 
> arm_binop_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  #define BINOP_UNONE_NONE_NONE_QUALIFIERS \
>(arm_binop_unone_none_none_qualifiers)
>  
> +static enum arm_type_qualifiers
> +arm_binop_pred_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_predicate, qualifier_none, qualifier_none };
> +#define BINOP_PRED_NONE_NONE_QUALIFIERS \
> +  (arm_binop_pred_none_none_qualifiers)
> +
>  static enum arm_type_qualifiers
>  arm_binop_unone_unone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_unsigned, qualifier_unsigned, qualifier_none };
> @@ -558,6 +570,12 @@ 
> arm_ternop_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  #define TERNOP_NONE_NONE_NONE_UNONE_QUALIFIERS \
>(arm_ternop_none_none_none_unone_qualifiers)
>  
> +static enum arm_type_qualifiers
> +arm_ternop_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_none, qualifier_none, qualifier_none, qualifier_predicate };
> +#define TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS \
> +  (arm_ternop_none_none_none_pred_qualifiers)
> +
>  static enum arm_type_qualifiers
>  arm_ternop_none_none_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_none, qualifier_none, qualifier_immediate, 
> qualifier_unsigned };
> @@ -577,6 +595,13 @@ 
> arm_ternop_unone_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  #define TERNOP_UNONE_UNONE_UNONE_UNONE_QUALIFIERS \
>(arm_ternop_unone_unone_unone_unone_qualifiers)
>  
> +static enum arm_type_qualifiers
> +arm_ternop_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
> +qualifier_predicate };
> +#define TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS \
> +  (arm_ternop_unone_unone_unone_pred_qualifiers)
> +
>  static enum arm_type_qualifiers
>  arm_ternop_none_none_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 1222cb0d0fe..5f6637d9a5f 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -25304,7 +25304,7 @@ arm_hard_regno_mode_ok (unsigned int regno, 
> machine_mode mode)
>  return false;
>  
>if (IS_VPR_REGNUM (regno))
> -return mode == HImode;
> +return mode == HImode || mode == V16BImode || mode == V8BImode || mode 
> == V4BImod

Re: [PATCH] gcov: make profile merging smarter

2021-10-11 Thread Martin Liška


On 10/11/21 15:49, Martin Liška wrote:

Let me update the documentation bits.


There's the updated patch.

May I install the patch now?

Thanks,
MartinFrom fdeb81a960faa19f75316e279a79c231da212f99 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Thu, 9 Sep 2021 13:02:24 +0200
Subject: [PATCH] gcov: make profile merging smarter

Support merging of profiles that are built from a different .o files
but belong to the same source file. Moreover, a checksum is verified
during profile merging and so we can safely combine such profile.

	PR gcov-profile/90364

gcc/ChangeLog:

	* coverage.c (build_info): Emit checksum to the global variable.
	(build_info_type): Add new field for checksum.
	(coverage_obj_finish): Pass object_checksum.
	(coverage_init): Use 0 as checksum for .gcno files.
	* gcov-dump.c (dump_gcov_file): Dump also new checksum field.
	* gcov.c (read_graph_file): Read also checksum.
	* doc/invoke.texi: Document the behaviour change.

libgcc/ChangeLog:

	* libgcov-driver.c (merge_one_data): Skip timestamp and verify
	checksums.
	(write_one_data): Write also checksum.
	* libgcov-util.c (read_gcda_file): Read also checksum field.
	* libgcov.h (struct gcov_info): Add new field.

diff --git a/gcc/coverage.c b/gcc/coverage.c
index 10d7f8366cb..4467f1eaa5c 100644
--- a/gcc/coverage.c
+++ b/gcc/coverage.c
@@ -129,16 +129,7 @@ static const char *const ctr_names[GCOV_COUNTERS] = {
 #undef DEF_GCOV_COUNTER
 
 /* Forward declarations.  */
-static void read_counts_file (void);
 static tree build_var (tree, tree, int);
-static void build_fn_info_type (tree, unsigned, tree);
-static void build_info_type (tree, tree);
-static tree build_fn_info (const struct coverage_data *, tree, tree);
-static tree build_info (tree, tree);
-static bool coverage_obj_init (void);
-static vec *coverage_obj_fn
-(vec *, tree, struct coverage_data const *);
-static void coverage_obj_finish (vec *);
 
 /* Return the type node for gcov_type.  */
 
@@ -218,6 +209,9 @@ read_counts_file (void)
   tag = gcov_read_unsigned ();
   bbg_file_stamp = crc32_unsigned (bbg_file_stamp, tag);
 
+  /* Read checksum.  */
+  gcov_read_unsigned ();
+
   counts_hash = new hash_table (10);
   while ((tag = gcov_read_unsigned ()))
 {
@@ -935,6 +929,12 @@ build_info_type (tree type, tree fn_info_ptr_type)
   DECL_CHAIN (field) = fields;
   fields = field;
 
+  /* Checksum.  */
+  field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
+		  get_gcov_unsigned_t ());
+  DECL_CHAIN (field) = fields;
+  fields = field;
+
   /* Filename */
   field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
 		  build_pointer_type (build_qualified_type
@@ -977,7 +977,7 @@ build_info_type (tree type, tree fn_info_ptr_type)
function info objects.  */
 
 static tree
-build_info (tree info_type, tree fn_ary)
+build_info (tree info_type, tree fn_ary, unsigned object_checksum)
 {
   tree info_fields = TYPE_FIELDS (info_type);
   tree merge_fn_type, n_funcs;
@@ -996,13 +996,19 @@ build_info (tree info_type, tree fn_ary)
   /* next -- NULL */
   CONSTRUCTOR_APPEND_ELT (v1, info_fields, null_pointer_node);
   info_fields = DECL_CHAIN (info_fields);
-  
+
   /* stamp */
   CONSTRUCTOR_APPEND_ELT (v1, info_fields,
 			  build_int_cstu (TREE_TYPE (info_fields),
 	  bbg_file_stamp));
   info_fields = DECL_CHAIN (info_fields);
 
+  /* Checksum.  */
+  CONSTRUCTOR_APPEND_ELT (v1, info_fields,
+			  build_int_cstu (TREE_TYPE (info_fields),
+	  object_checksum));
+  info_fields = DECL_CHAIN (info_fields);
+
   /* Filename */
   da_file_name_len = strlen (da_file_name);
   filename_string = build_string (da_file_name_len + 1, da_file_name);
@@ -1214,7 +1220,8 @@ coverage_obj_fn (vec *ctor, tree fn,
function objects from CTOR.  Generate the gcov_info initializer.  */
 
 static void
-coverage_obj_finish (vec *ctor)
+coverage_obj_finish (vec *ctor,
+		 unsigned object_checksum)
 {
   unsigned n_functions = vec_safe_length (ctor);
   tree fn_info_ary_type = build_array_type
@@ -1231,7 +1238,7 @@ coverage_obj_finish (vec *ctor)
   varpool_node::finalize_decl (fn_info_ary);
   
   DECL_INITIAL (gcov_info_var)
-= build_info (TREE_TYPE (gcov_info_var), fn_info_ary);
+= build_info (TREE_TYPE (gcov_info_var), fn_info_ary, object_checksum);
   varpool_node::finalize_decl (gcov_info_var);
 }
 
@@ -1300,7 +1307,6 @@ coverage_init (const char *filename)
   strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX);
 
   bbg_file_stamp = local_tick;
-  
   if (flag_auto_profile)
 read_autofdo_file ();
   else if (flag_branch_probabilities)
@@ -1328,6 +1334,8 @@ coverage_init (const char *filename)
 	  gcov_write_unsigned (GCOV_NOTE_MAGIC);
 	  gcov_write_unsigned (GCOV_VERSION);
 	  gcov_write_unsigned (bbg_file_stamp);
+	  /* Use an arbitrary checksum */
+	  gcov_write_unsigned (0);
 	  gcov_write_string (getpwd ());
 
 	  /* Do not support has_unexecuted_blocks for Ada.  */
@@ -1353,14 +1361,24 @@ coverage_finish (void)
cannot uniquely stam

Re: [PATCH 09/13] arm: Fix vcond_mask expander for MVE (PR target/100757)

2021-10-11 Thread Richard Sandiford via Gcc-patches

Christophe Lyon via Gcc-patches  writes:
> From: Christophe Lyon 
>
> The problem in this PR is that we call VPSEL with a mask of vector
> type instead of HImode. This happens because operand 3 in vcond_mask
> is the pre-computed vector comparison and has vector type.
>
> This patch fixes it by implementing TARGET_VECTORIZE_GET_MASK_MODE,
> returning the appropriate VxBI mode when targeting MVE.  In turn, this
> implies implementing vec_cmp,
> vec_cmpu and vcond_mask_, and we can
> move vec_cmp, vec_cmpu and
> vcond_mask_ back to neon.md since they are not
> used by MVE anymore.  The new * patterns listed above are
> implemented in mve.md since they are only valid for MVE. However this
> may make maintenance/comparison more painful than having all of them
> in vec-common.md.
>
> In the process, we can get rid of the recently added vcond_mve
> parameter of arm_expand_vector_compare.
>
> Compared to neon.md's vcond_mask_ before my "arm:
> Auto-vectorization for MVE: vcmp" patch (r12-834), it keeps the VDQWH
> iterator added in r12-835 (to have V4HF/V8HF support), as well as the
> (! || flag_unsafe_math_optimizations) condition which
> was not present before r12-834 although SF modes were enabled by VDQW
> (I think this was a bug).
>
> Using TARGET_VECTORIZE_GET_MASK_MODE has the advantage that we no
> longer need to generate vpsel with vectors of 0 and 1: the masks are
> now merged via scalar 'ands' instructions operating on 16-bit masks
> after converting the boolean vectors.
>
> In addition, this patch fixes a problem in arm_expand_vcond() where
> the result would be a vector of 0 or 1 instead of operand 1 or 2.
>
> Reducing the number of iterations in pr100757-3.c from 32 to 8, we
> generate the code below:
>
> float a[32];
> float fn1(int d) {
>   float c = 4.0f;
>   for (int b = 0; b < 8; b++)
> if (a[b] != 2.0f)
>   c = 5.0f;
>   return c;
> }
>
> fn1:
>   ldr r3, .L3+48
>   vldr.64 d4, .L3  // q2=(2.0,2.0,2.0,2.0)
>   vldr.64 d5, .L3+8
>   vldrw.32q0, [r3] // q0=a(0..3)
>   addsr3, r3, #16
>   vcmp.f32eq, q0, q2   // cmp a(0..3) == (2.0,2.0,2.0,2.0)
>   vldrw.32q1, [r3] // q1=a(4..7)
>   vmrs r3, P0
>   vcmp.f32eq, q1, q2   // cmp a(4..7) == (2.0,2.0,2.0,2.0)
>   vmrsr2, P0  @ movhi
>   andsr3, r3, r2   // r3=select(a(0..3]) & select(a(4..7))
>   vldr.64 d4, .L3+16   // q2=(5.0,5.0,5.0,5.0)
>   vldr.64 d5, .L3+24
>   vmsr P0, r3
>   vldr.64 d6, .L3+32   // q3=(4.0,4.0,4.0,4.0)
>   vldr.64 d7, .L3+40
>   vpsel q3, q3, q2 // q3=vcond_mask(4.0,5.0)
>   vmov.32 r2, q3[1]// keep the scalar max
>   vmov.32 r0, q3[3]
>   vmov.32 r3, q3[2]
>   vmov.f32s11, s12
>   vmovs15, r2
>   vmovs14, r3
>   vmaxnm.f32  s15, s11, s15
>   vmaxnm.f32  s15, s15, s14
>   vmovs14, r0
>   vmaxnm.f32  s15, s15, s14
>   vmovr0, s15
>   bx  lr
>   .L4:
>   .align  3
>   .L3:
>   .word   1073741824  // 2.0f
>   .word   1073741824
>   .word   1073741824
>   .word   1073741824
>   .word   1084227584  // 5.0f
>   .word   1084227584
>   .word   1084227584
>   .word   1084227584
>   .word   1082130432  // 4.0f
>   .word   1082130432
>   .word   1082130432
>   .word   1082130432
>
> 2021-09-02  Christophe Lyon  
>
>   PR target/100757
>   gcc/
>   * config/arm/arm-protos.h (arm_get_mask_mode): New prototype.
>   (arm_expand_vector_compare): Update prototype.
>   * config/arm/arm.c (TARGET_VECTORIZE_GET_MASK_MODE): New.
>   (arm_vector_mode_supported_p): Add support for VxBI modes.
>   (arm_expand_vector_compare): Remove useless generation of vpsel.
>   (arm_expand_vcond): Fix select operands.
>   (arm_get_mask_mode): New.
>   * config/arm/mve.md (vec_cmp): New.
>   (vec_cmpu): New.
>   (vcond_mask_): New.
>   * config/arm/vec-common.md (vec_cmp)
>   (vec_cmpu): Move to ...
>   * config/arm/neon.md (vec_cmp)
>   (vec_cmpu): ... here
>   and disable for MVE.
>
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index 9b1f61394ad..9e3d71e0c29 100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -201,6 +201,7 @@ extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, 
> tree, rtx, tree);
>  extern bool arm_pad_reg_upward (machine_mode, tree, int);
>  #endif
>  extern int arm_apply_result_size (void);
> +extern opt_machine_mode arm_get_mask_mode (machine_mode mode);
>  
>  #endif /* RTX_CODE */
>  
> @@ -372,7 +373,7 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, 
> rtx, rtx, rtx, rtx,
>  extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
>  extern bool arm_valid_symbolic_address_p (rtx);
>  extern bool arm_validize_comparison (rtx *, rtx *, rtx *)

Re: [PATCH 10/13] arm: Convert remaining MVE vcmp builtins to predicate qualifiers

2021-10-11 Thread Richard Sandiford via Gcc-patches

Christophe Lyon via Gcc-patches  writes:
> This is mostly a mechanical change, only tested by the intrinsics
> expansion tests.
>
> 2021-09-02  Christophe Lyon  
>
>   gcc/
>   PR target/100757
>   PR target/101325
>   * config/arm/arm-builtins.c (BINOP_UNONE_NONE_NONE_QUALIFIERS):
>   Delete.
>   (TERNOP_UNONE_NONE_NONE_UNONE_QUALIFIERS): Change to ...
>   (TERNOP_PRED_NONE_NONE_PRED_QUALIFIERS): ... this.
>   (TERNOP_PRED_UNONE_UNONE_PRED_QUALIFIERS): New.
>   * config/arm/arm_mve_builtins.def (vcmp*q_n_, vcmp*q_m_f): Use new
>   predicated qualifiers.
>   * config/arm/mve.md (mve_vcmpq_n_)
>   (mve_vcmp*q_m_f): Use MVE_VPRED instead of HI.

OK, thanks.

Richard

>
> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
> index 6e3638869f1..b3455d87d4f 100644
> --- a/gcc/config/arm/arm-builtins.c
> +++ b/gcc/config/arm/arm-builtins.c
> @@ -487,12 +487,6 @@ 
> arm_binop_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  #define BINOP_NONE_NONE_UNONE_QUALIFIERS \
>(arm_binop_none_none_unone_qualifiers)
>  
> -static enum arm_type_qualifiers
> -arm_binop_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> -  = { qualifier_unsigned, qualifier_none, qualifier_none };
> -#define BINOP_UNONE_NONE_NONE_QUALIFIERS \
> -  (arm_binop_unone_none_none_qualifiers)
> -
>  static enum arm_type_qualifiers
>  arm_binop_pred_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_predicate, qualifier_none, qualifier_none };
> @@ -553,10 +547,10 @@ 
> arm_ternop_unone_unone_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>(arm_ternop_unone_unone_imm_unone_qualifiers)
>  
>  static enum arm_type_qualifiers
> -arm_ternop_unone_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> -  = { qualifier_unsigned, qualifier_none, qualifier_none, qualifier_unsigned 
> };
> -#define TERNOP_UNONE_NONE_NONE_UNONE_QUALIFIERS \
> -  (arm_ternop_unone_none_none_unone_qualifiers)
> +arm_ternop_pred_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_predicate, qualifier_none, qualifier_none, 
> qualifier_predicate };
> +#define TERNOP_PRED_NONE_NONE_PRED_QUALIFIERS \
> +  (arm_ternop_pred_none_none_pred_qualifiers)
>  
>  static enum arm_type_qualifiers
>  arm_ternop_none_none_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> @@ -602,6 +596,13 @@ 
> arm_ternop_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  #define TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS \
>(arm_ternop_unone_unone_unone_pred_qualifiers)
>  
> +static enum arm_type_qualifiers
> +arm_ternop_pred_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> +  = { qualifier_predicate, qualifier_unsigned, qualifier_unsigned,
> +qualifier_predicate };
> +#define TERNOP_PRED_UNONE_UNONE_PRED_QUALIFIERS \
> +  (arm_ternop_pred_unone_unone_pred_qualifiers)
> +
>  static enum arm_type_qualifiers
>  arm_ternop_none_none_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
> diff --git a/gcc/config/arm/arm_mve_builtins.def 
> b/gcc/config/arm/arm_mve_builtins.def
> index 58a05e61bd9..91ed2073918 100644
> --- a/gcc/config/arm/arm_mve_builtins.def
> +++ b/gcc/config/arm/arm_mve_builtins.def
> @@ -118,9 +118,9 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_u, v16qi, v8hi, 
> v4si)
>  VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_n_u, v16qi, v8hi, v4si)
>  VAR3 (BINOP_UNONE_UNONE_UNONE, veorq_u, v16qi, v8hi, v4si)
>  VAR3 (BINOP_PRED_UNONE_UNONE, vcmphiq_, v16qi, v8hi, v4si)
> -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_n_, v16qi, v8hi, v4si)
> +VAR3 (BINOP_PRED_UNONE_UNONE, vcmphiq_n_, v16qi, v8hi, v4si)
>  VAR3 (BINOP_PRED_UNONE_UNONE, vcmpcsq_, v16qi, v8hi, v4si)
> -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_, v16qi, v8hi, v4si)
> +VAR3 (BINOP_PRED_UNONE_UNONE, vcmpcsq_n_, v16qi, v8hi, v4si)
>  VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si)
>  VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si)
>  VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si)
> @@ -142,17 +142,17 @@ VAR3 (BINOP_UNONE_UNONE_NONE, vbrsrq_n_u, v16qi, v8hi, 
> v4si)
>  VAR3 (BINOP_UNONE_UNONE_IMM, vshlq_n_u, v16qi, v8hi, v4si)
>  VAR3 (BINOP_UNONE_UNONE_IMM, vrshrq_n_u, v16qi, v8hi, v4si)
>  VAR3 (BINOP_UNONE_UNONE_IMM, vqshlq_n_u, v16qi, v8hi, v4si)
> -VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_n_, v16qi, v8hi, v4si)
> +VAR3 (BINOP_PRED_NONE_NONE, vcmpneq_n_, v16qi, v8hi, v4si)
>  VAR3 (BINOP_PRED_NONE_NONE, vcmpltq_, v16qi, v8hi, v4si)
> -VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_n_, v16qi, v8hi, v4si)
> +VAR3 (BINOP_PRED_NONE_NONE, vcmpltq_n_, v16qi, v8hi, v4si)
>  VAR3 (BINOP_PRED_NONE_NONE, vcmpleq_, v16qi, v8hi, v4si)
> -VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_n_, v16qi, v8hi, v4si)
> +VAR3 (BINOP_PRED_NONE_NONE, vcmpleq_n_, v16qi, v8hi, v4si)
>  VAR3 (BINOP_PRED_NONE_NONE, vcmpgtq_, v16qi, v8hi, v4si)
> -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_, v16qi, v8hi, v4si)
> +VAR3 (BINOP_PRED_NONE_NONE, vcmpgtq_n_, v16qi, v8hi, v4si)
>  VAR3 (BINOP_PRED_NONE_N

[PATCH] middle-end/101480 - overloaded global new/delete

2021-10-11 Thread Richard Biener via Gcc-patches

The following fixes the issue of ignoring side-effects on memory
from overloaded global new/delete operators by not marking them
as effectively 'const' apart from other explicitely specified
side-effects.

This will cause

FAIL: g++.dg/warn/Warray-bounds-16.C  -std=gnu++1? (test for excess errors)

because we now no longer statically see the initialization loop
never executes because the call to operator new can now clobber 'a.m'.
This seems to be an issue with the warning code and/or ranger so
I'm leaving this FAIL to be addressed as followup.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-10-11  Richard Biener  

PR middle-end/101480
* gimple.c (gimple_call_fnspec): Do not mark operator new/delete
as const.

* g++.dg/torture/pr10148.C: New testcase.
---
 gcc/gimple.c   |  4 +-
 gcc/testsuite/g++.dg/torture/pr10148.C | 52 ++
 2 files changed, 54 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr10148.C

diff --git a/gcc/gimple.c b/gcc/gimple.c
index bed7ff9e71c..cc7a88e822b 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -1549,12 +1549,12 @@ gimple_call_fnspec (const gcall *stmt)
   && DECL_IS_OPERATOR_DELETE_P (fndecl)
   && DECL_IS_REPLACEABLE_OPERATOR (fndecl)
   && gimple_call_from_new_or_delete (stmt))
-return ".co ";
+return ". o ";
   /* Similarly operator new can be treated as malloc.  */
   if (fndecl
   && DECL_IS_REPLACEABLE_OPERATOR_NEW_P (fndecl)
   && gimple_call_from_new_or_delete (stmt))
-return "mC";
+return "m ";
   return "";
 }
 
diff --git a/gcc/testsuite/g++.dg/torture/pr10148.C 
b/gcc/testsuite/g++.dg/torture/pr10148.C
new file mode 100644
index 000..ed278f9f8d8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr10148.C
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+
+#include 
+#include 
+
+static bool flag = false;
+
+class C
+{
+  bool prev;
+
+public:
+  C() : prev(flag)
+  {
+flag = true;
+  }
+
+  ~C() {
+flag = prev;
+  }
+};
+
+void* operator new(size_t size)
+{
+  assert(flag);
+  return malloc(size);
+}
+
+void operator delete(void *p)
+{
+  free(p);
+}
+
+void g(int* p)
+{
+  delete p;
+}
+
+void f()
+{
+  int* p;
+  {
+C c;
+p = new int;
+  }
+  g(p);
+}
+
+int main(int, char**)
+{
+  f();
+}
-- 
2.31.1

Re: [PATCH 12/13] arm: Convert more load/store MVE builtins to predicate qualifiers

2021-10-11 Thread Richard Sandiford via Gcc-patches

Christophe Lyon via Gcc-patches  writes:
> This patch covers a few builtins where we do not use the 
> iterator and thus we cannot use .
>
> However this introduces a problem for the v2di instructions, because
> there is not predicate for this case.  For instance, changing
> STRSBS_P_QUALIFIERS breaks mve_vstrdq_scatter_base_p_v2di.
> Similarly, this patch introduces problems with:
> mve_vldrdq_gather_base_z_v2di
> mve_vldrdq_gather_base_wb_z_v2di
> mve_vldrdq_gather_base_nowb_z_v2di
> mve_vstrdq_scatter_base_wb_p_v2di

I think we should treat HImode as the predicate mode for V2DI,
alongside the others, even though it isn't a vector boolean mode.
Like you say, it'll end up being too much of a special case otherwise.

Thanks,
Richard

> 2021-09-02  Christophe Lyon  
>
>   gcc/
>   PR target/100757
>   PR target/101325
>   * config/arm/arm-builtins.c (STRSBS_P_QUALIFIERS): Use predicate
>   qualifier.
>   (STRSBU_P_QUALIFIERS): Likewise.
>   (LDRGBS_Z_QUALIFIERS): Likewise.
>   (LDRGBU_Z_QUALIFIERS): Likewise.
>   (LDRGBWBXU_Z_QUALIFIERS): Likewise.
>   (LDRGBWBS_Z_QUALIFIERS): Likewise.
>   (LDRGBWBU_Z_QUALIFIERS): Likewise.
>   (STRSBWBS_P_QUALIFIERS): Likewise.
>   (STRSBWBU_P_QUALIFIERS): Likewise.
>   * config/arm/mve.md: Use VxBI instead of HI.
>
> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
> index 06ff9d2278a..e58580bb828 100644
> --- a/gcc/config/arm/arm-builtins.c
> +++ b/gcc/config/arm/arm-builtins.c
> @@ -738,13 +738,13 @@ arm_strss_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  static enum arm_type_qualifiers
>  arm_strsbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_void, qualifier_unsigned, qualifier_immediate,
> -  qualifier_none, qualifier_unsigned};
> +  qualifier_none, qualifier_predicate};
>  #define STRSBS_P_QUALIFIERS (arm_strsbs_p_qualifiers)
>  
>  static enum arm_type_qualifiers
>  arm_strsbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_void, qualifier_unsigned, qualifier_immediate,
> -  qualifier_unsigned, qualifier_unsigned};
> +  qualifier_unsigned, qualifier_predicate};
>  #define STRSBU_P_QUALIFIERS (arm_strsbu_p_qualifiers)
>  
>  static enum arm_type_qualifiers
> @@ -780,13 +780,13 @@ arm_ldrgbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  static enum arm_type_qualifiers
>  arm_ldrgbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_none, qualifier_unsigned, qualifier_immediate,
> -  qualifier_unsigned};
> +  qualifier_predicate};
>  #define LDRGBS_Z_QUALIFIERS (arm_ldrgbs_z_qualifiers)
>  
>  static enum arm_type_qualifiers
>  arm_ldrgbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate,
> -  qualifier_unsigned};
> +  qualifier_predicate};
>  #define LDRGBU_Z_QUALIFIERS (arm_ldrgbu_z_qualifiers)
>  
>  static enum arm_type_qualifiers
> @@ -826,7 +826,7 @@ arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  static enum arm_type_qualifiers
>  arm_ldrgbwbxu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate,
> -  qualifier_unsigned};
> +  qualifier_predicate};
>  #define LDRGBWBXU_Z_QUALIFIERS (arm_ldrgbwbxu_z_qualifiers)
>  
>  static enum arm_type_qualifiers
> @@ -842,13 +842,13 @@ arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  static enum arm_type_qualifiers
>  arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_none, qualifier_unsigned, qualifier_immediate,
> -  qualifier_unsigned};
> +  qualifier_predicate};
>  #define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers)
>  
>  static enum arm_type_qualifiers
>  arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate,
> -  qualifier_unsigned};
> +  qualifier_predicate};
>  #define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers)
>  
>  static enum arm_type_qualifiers
> @@ -864,13 +864,13 @@ arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  static enum arm_type_qualifiers
>  arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_unsigned, qualifier_unsigned, qualifier_const,
> -  qualifier_none, qualifier_unsigned};
> +  qualifier_none, qualifier_predicate};
>  #define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers)
>  
>  static enum arm_type_qualifiers
>  arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_unsigned, qualifier_unsigned, qualifier_const,
> -  qualifier_unsigned, qualifier_unsigned};
> +  qualifier_unsigned, qualifier_predicate};
>  #define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers)
>  
>  static enum arm_type_qualifiers
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index 2f36d47c800..241195909da 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -7294,7 +7294,7 @@ (define_insn "mve_vstrwq_scatter_base_p_v4si"
>   [(match_operand:V4SI 0 "s_register_operand" "w")
>

Re: [PATCH 13/13] arm: Convert more MVE/CDE builtins to predicate qualifiers

2021-10-11 Thread Richard Sandiford via Gcc-patches

Christophe Lyon via Gcc-patches  writes:
> This patch covers a few non-load/store builtins where we do not use
> the  iterator and thus we cannot use .
>
> We need to update the expected code in cde-mve-full-assembly.c because
> we now use mve_movv16qi instead of movhi to generate the vmsr
> instruction.
>
> 2021-09-02  Christophe Lyon  
>
>   gcc/
>   PR target/100757
>   PR target/101325
>   * config/arm/arm-builtins.c (CX_UNARY_UNONE_QUALIFIERS): Use
>   predicate.
>   (CX_BINARY_UNONE_QUALIFIERS): Likewise.
>   (CX_TERNARY_UNONE_QUALIFIERS): Likewise.
>   (TERNOP_NONE_NONE_NONE_UNONE_QUALIFIERS): Delete.
>   (QUADOP_NONE_NONE_NONE_NONE_UNONE_QUALIFIERS): Delete.
>   (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE_QUALIFIERS): Delete.
>   * config/arm/arm_mve_builtins.def: Use predicated qualifiers.
>   * config/arm/mve.md: Use VxBI instead of HI.
>
>   gcc/testsuite/
>   * gcc.target/arm/acle/cde-mve-full-assembly.c: Remove expected '@ 
> movhi'.

OK, thanks.

Richard

> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
> index e58580bb828..d725458f1ad 100644
> --- a/gcc/config/arm/arm-builtins.c
> +++ b/gcc/config/arm/arm-builtins.c
> @@ -344,7 +344,7 @@ static enum arm_type_qualifiers
>  arm_cx_unary_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_none, qualifier_immediate, qualifier_none,
>qualifier_unsigned_immediate,
> -  qualifier_unsigned };
> +  qualifier_predicate };
>  #define CX_UNARY_UNONE_QUALIFIERS (arm_cx_unary_unone_qualifiers)
>  
>  /* T (immediate, T, T, unsigned immediate).  */
> @@ -353,7 +353,7 @@ arm_cx_binary_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_none, qualifier_immediate,
>qualifier_none, qualifier_none,
>qualifier_unsigned_immediate,
> -  qualifier_unsigned };
> +  qualifier_predicate };
>  #define CX_BINARY_UNONE_QUALIFIERS (arm_cx_binary_unone_qualifiers)
>  
>  /* T (immediate, T, T, T, unsigned immediate).  */
> @@ -362,7 +362,7 @@ arm_cx_ternary_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_none, qualifier_immediate,
>qualifier_none, qualifier_none, qualifier_none,
>qualifier_unsigned_immediate,
> -  qualifier_unsigned };
> +  qualifier_predicate };
>  #define CX_TERNARY_UNONE_QUALIFIERS (arm_cx_ternary_unone_qualifiers)
>  
>  /* The first argument (return type) of a store should be void type,
> @@ -558,12 +558,6 @@ 
> arm_ternop_none_none_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  #define TERNOP_NONE_NONE_NONE_IMM_QUALIFIERS \
>(arm_ternop_none_none_none_imm_qualifiers)
>  
> -static enum arm_type_qualifiers
> -arm_ternop_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> -  = { qualifier_none, qualifier_none, qualifier_none, qualifier_unsigned };
> -#define TERNOP_NONE_NONE_NONE_UNONE_QUALIFIERS \
> -  (arm_ternop_none_none_none_unone_qualifiers)
> -
>  static enum arm_type_qualifiers
>  arm_ternop_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_none, qualifier_none, qualifier_none, qualifier_predicate };
> @@ -616,13 +610,6 @@ 
> arm_quadop_unone_unone_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  #define QUADOP_UNONE_UNONE_NONE_NONE_PRED_QUALIFIERS \
>(arm_quadop_unone_unone_none_none_pred_qualifiers)
>  
> -static enum arm_type_qualifiers
> -arm_quadop_none_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> -  = { qualifier_none, qualifier_none, qualifier_none, qualifier_none,
> -qualifier_unsigned };
> -#define QUADOP_NONE_NONE_NONE_NONE_UNONE_QUALIFIERS \
> -  (arm_quadop_none_none_none_none_unone_qualifiers)
> -
>  static enum arm_type_qualifiers
>  arm_quadop_none_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_none, qualifier_none, qualifier_none, qualifier_none,
> @@ -637,13 +624,6 @@ 
> arm_quadop_none_none_none_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>  #define QUADOP_NONE_NONE_NONE_IMM_PRED_QUALIFIERS \
>(arm_quadop_none_none_none_imm_pred_qualifiers)
>  
> -static enum arm_type_qualifiers
> -arm_quadop_unone_unone_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> -  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
> -qualifier_unsigned, qualifier_unsigned };
> -#define QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE_QUALIFIERS \
> -  (arm_quadop_unone_unone_unone_unone_unone_qualifiers)
> -
>  static enum arm_type_qualifiers
>  arm_quadop_unone_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
> diff --git a/gcc/config/arm/arm_mve_builtins.def 
> b/gcc/config/arm/arm_mve_builtins.def
> index bb79edf83ca..0fb53d866ec 100644
> --- a/gcc/config/arm/arm_mve_builtins.def
> +++ b/gcc/config/arm/arm_mve_builtins.def
> @@ -87,8 +87,8 @@ VAR4 (BINOP_UNONE_UNONE_UNONE, vcreateq_u, v16qi, v8hi, 
> v4si, v2di)
>  VAR4 (BINOP_NONE_UNONE_UNONE, vcreateq_s, v16qi, v8hi, v4si, v2di)
>  VAR3 (BINOP_UNONE_UNONE_IMM, vshrq_

[PATCH] middle-end/102683 - fix .DEFERRED_INIT expansion

2021-10-11 Thread Richard Biener via Gcc-patches

This avoids using an integer type for which we don't have an
approprate mode when expanding .DEFERRED_INIT to a non-memory
entity.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
Will push after it finished.

Richard.

2021-10-11  Richard Biener  

PR middle-end/102683
* internal-fn.c (expand_DEFERRED_INIT): Check for mode
availability before building an integer type for storage
purposes.
---
 gcc/internal-fn.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 6bc256832f7..b3638192fb9 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -3074,7 +3074,9 @@ expand_DEFERRED_INIT (internal_fn, gcall *stmt)
   tree init;
   if (tree_fits_uhwi_p (var_size)
  && (init_type == AUTO_INIT_PATTERN
- || !is_gimple_reg_type (var_type)))
+ || !is_gimple_reg_type (var_type))
+ && int_mode_for_size (tree_to_uhwi (var_size) * BITS_PER_UNIT,
+   0).exists ())
{
  unsigned HOST_WIDE_INT total_bytes = tree_to_uhwi (var_size);
  unsigned char *buf = (unsigned char *) xmalloc (total_bytes);
-- 
2.31.1

Re: [PATCH] libstdc++: Fix compare_three_way for constexpr and Clang

2021-10-11 Thread Paul Keir via Gcc-patches

*ping*

From: Paul Keir 
Sent: 03 September 2021 11:31
To: gcc-patches@gcc.gnu.org
Cc: libstd...@gcc.gnu.org
Subject: Re: [PATCH] libstdc++: Fix compare_three_way for constexpr and Clang

*ping*

From: Paul Keir 
Sent: 20 August 2021 21:17
To: gcc-patches@gcc.gnu.org
Cc: libstd...@gcc.gnu.org
Subject: [PATCH] libstdc++: Fix compare_three_way for constexpr and Clang

Hi,

The current compare_three_way implementation makes provision for constant 
evaluation contexts (avoiding reinterpret_cast etc.), but the approach fails 
with Clang; when it compares two const volatile void pointers: "comparison 
between unequal pointers to void has unspecified result". I include a fix and 
test.

Could someone commit the attached patch for me?

Thanks,
Paul

Please consider the environment and think before you print.

The University of the West of Scotland is a registered Scottish charity. 
Charity number SC002520.

This e-mail and any attachment is for authorised use by the intended 
recipient(s) only. It may contain proprietary material, confidential 
information and/or be subject to legal privilege. It should not be copied, 
disclosed to, retained or used by, any other party. If you are not an intended 
recipient then please promptly delete this e-mail and any attachment and all 
copies and inform the sender.

Please note that any views or opinions presented in this email are solely those 
of the author and do not necessarily represent those of the University of the 
West of Scotland.

As a public body, the University of the West of Scotland may be required to 
make available emails as well as other written forms of information as a result 
of a request made under the Freedom of Information (Scotland) Act 2002.

[PATCH] libgomp: alloc* test fixes [PR102628, PR102668]

2021-10-11 Thread Jakub Jelinek via Gcc-patches

Hi!

As reported, the alloc-9.c test and alloc-{1,2,3}.F* and alloc-11.f90
tests fail on powerpc64-linux with -m32.
The reason why it fails just there is that malloc doesn't guarantee there
128-bit alignment (historically glibc guaranteed 2 * sizeof (void *)
alignment from malloc).

There are two separate issues.
One is a thinko on my side.
In this part of alloc-9.c test (copied to alloc-11.f90), we have
2 allocators, a with pool size 1024B and alignment 16B and default fallback
and a2 with pool size 512B and alignment 32B and a as fallback allocator.
We start at no allocations in both at line 194 and do:
  p = (int *) omp_alloc (sizeof (int), a2);
// This succeeds in a2 and needs 4+overhead bytes (which includes the 32B 
alignment)
  p = (int *) omp_realloc (p, 420, a, a2);
// This allocates 420 bytes+overhead in a, with 16B alignment and deallocates 
the above
  q = (int *) omp_alloc (sizeof (int), a);
// This allocates 4+overhead bytes in a, with 16B alignment
  q = (int *) omp_realloc (q, 420, a2, a);
// This allocates 420+overhead in a2 with 32B alignment
  q = (int *) omp_realloc (q, 768, a2, a2);
// This attempts to reallocate, but as there are elevated alignment
// requirements doesn't try to just realloc (even if it wanted to try that
// a2 is almost full, with 512-420-overhead bytes left in it), so it
// tries to alloc in a2, but there is no space left in the pool, falls
// back to a, which already has 420+overhead bytes allocated in it and
// 1024-420-overhead bytes left and so fails too and fails to default
// non-pool allocator that allocates it, but doesn't guarantee alignment
// higher than malloc guarantees.
// But, the test expected 16B alignment.

So, I've slightly lowered the allocation sizes in that part of the test
420->320 and 768 -> 568, so that the last test still fails to allocate
in a2 (568 > 512-320-overhead) but succeeds in a as fallback, which was
the intent of the test.

Another thing is that alloc-1.F90 seems to be transcription of
libgomp.c-c++-common/alloc-1.c into Fortran, but alloc-1.c had:
  q = (int *) omp_alloc (768, a2);
  if uintptr_t) q) % 16) != 0)
abort ();
  q[0] = 7;
  q[767 / sizeof (int)] = 8;
  r = (int *) omp_alloc (512, a2);
  if uintptr_t) r) % __alignof (int)) != 0)
abort ();
there but Fortran has:
cq = omp_alloc (768_c_size_t, a2)
if (mod (transfer (cq, intptr), 16_c_intptr_t) /= 0) stop 12
call c_f_pointer (cq, q, [768 / c_sizeof (i)])
q(1) = 7
q(768 / c_sizeof (i)) = 8
cr = omp_alloc (512_c_size_t, a2)
if (mod (transfer (cr, intptr), 16_c_intptr_t) /= 0) stop 13
I'm changing the latter to 4_c_intptr_t because other spots in the
testcase do that, Fortran sadly doesn't have c_alignof, but strictly
speaking it isn't correct, __alignof (int) could be on some architectures
smaller than 4.
So probably alloc-1.F90 etc. should also have
! { dg-additional-sources alloc-7.c }
! { dg-prune-output "command-line option '-fintrinsic-modules-path=.*' is valid 
for Fortran but not for C" }
and use get__alignof_int.

Tested on powerpc64-linux with -m32 and -m64, will commit after full
bootstrap/regtest on x86_64-linux and i686-linux.

2021-10-11  Jakub Jelinek  

PR libgomp/102628
PR libgomp/102668
* testsuite/libgomp.c-c++-common/alloc-9.c (main): Decrease
allocation sizes from 420 to 320 and from 768 to 568.
* testsuite/libgomp.fortran/alloc-11.f90: Likewise.
* testsuite/libgomp.fortran/alloc-1.F90: Change expected alignment
for cr from 16 to 4.

--- libgomp/testsuite/libgomp.c-c++-common/alloc-9.c.jj 2021-10-01 
10:32:03.030954011 +0200
+++ libgomp/testsuite/libgomp.c-c++-common/alloc-9.c2021-10-11 
15:34:07.719040377 +0200
@@ -195,25 +195,25 @@ main ()
   if uintptr_t) p) % 32) != 0)
 abort ();
   p[0] = 85;
-  p = (int *) omp_realloc (p, 420, a, a2);
+  p = (int *) omp_realloc (p, 320, a, a2);
   if uintptr_t) p) % 16) != 0 || p[0] != 85)
 abort ();
   p[0] = 5;
-  p[419 / sizeof (int)] = 6;
+  p[319 / sizeof (int)] = 6;
   q = (int *) omp_alloc (sizeof (int), a);
   if uintptr_t) q) % 16) != 0)
 abort ();
   q[0] = 43;
-  q = (int *) omp_realloc (q, 420, a2, a);
+  q = (int *) omp_realloc (q, 320, a2, a);
   if uintptr_t) q) % 32) != 0 || q[0] != 43)
 abort ();
   q[0] = 44;
-  q[419 / sizeof (int)] = 8;
-  q = (int *) omp_realloc (q, 768, a2, a2);
+  q[319 / sizeof (int)] = 8;
+  q = (int *) omp_realloc (q, 568, a2, a2);
   if uintptr_t) q) % 16) != 0 || q[0] != 44)
 abort ();
   q[0] = 7;
-  q[767 / sizeof (int)] = 8;
+  q[567 / sizeof (int)] = 8;
   omp_free (p, omp_null_allocator);
   omp_free (q, a2);
   omp_destroy_allocator (a2);
--- libgomp/testsuite/libgomp.fortran/alloc-11.f90.jj   2021-10-04 
10:16:11.013138378 +0200
+++ libgomp/testsuite/libgomp.fortran/alloc-11.f90  2021-10-11 
15:51:08.938495429 +0200
@@ -230,32 +230,32 @@ program main
   if (mod (TRANSFER (p, iptr), 32) /= 0)

Re: [PATCH 07/11] OpenMP: Fix non-zero attach/detach bias for struct dereferences

2021-10-11 Thread Julian Brown

On Fri, 1 Oct 2021 10:09:05 -0700
Julian Brown  wrote:

> libgomp/
>   * testsuite/libgomp.c++/baseptrs-3.C: Add test (XFAILed for
> now).

This XFAILed test is addressed in the followup patch:

  https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581342.html

Cheers,

Julian

Re: [PATCH 12/13] arm: Convert more load/store MVE builtins to predicate qualifiers

2021-10-11 Thread Richard Sandiford via Gcc-patches

Richard Sandiford via Gcc-patches  writes:
> Christophe Lyon via Gcc-patches  writes:
>> This patch covers a few builtins where we do not use the 
>> iterator and thus we cannot use .
>>
>> However this introduces a problem for the v2di instructions, because
>> there is not predicate for this case.  For instance, changing
>> STRSBS_P_QUALIFIERS breaks mve_vstrdq_scatter_base_p_v2di.
>> Similarly, this patch introduces problems with:
>> mve_vldrdq_gather_base_z_v2di
>> mve_vldrdq_gather_base_wb_z_v2di
>> mve_vldrdq_gather_base_nowb_z_v2di
>> mve_vstrdq_scatter_base_wb_p_v2di
>
> I think we should treat HImode as the predicate mode for V2DI,
> alongside the others, even though it isn't a vector boolean mode.
> Like you say, it'll end up being too much of a special case otherwise.

Actually: couldn't we have a V8BI too?  Sorry, I was getting confused
with something else and thought that that wouldn't be possible for
some reason.

V8BI might not be used as much as the other boolean modes, but it still
seems like conceptually the right thing to do.

Thanks,
Richard

>
> Thanks,
> Richard
>
>> 2021-09-02  Christophe Lyon  
>>
>>  gcc/
>>  PR target/100757
>>  PR target/101325
>>  * config/arm/arm-builtins.c (STRSBS_P_QUALIFIERS): Use predicate
>>  qualifier.
>>  (STRSBU_P_QUALIFIERS): Likewise.
>>  (LDRGBS_Z_QUALIFIERS): Likewise.
>>  (LDRGBU_Z_QUALIFIERS): Likewise.
>>  (LDRGBWBXU_Z_QUALIFIERS): Likewise.
>>  (LDRGBWBS_Z_QUALIFIERS): Likewise.
>>  (LDRGBWBU_Z_QUALIFIERS): Likewise.
>>  (STRSBWBS_P_QUALIFIERS): Likewise.
>>  (STRSBWBU_P_QUALIFIERS): Likewise.
>>  * config/arm/mve.md: Use VxBI instead of HI.
>>
>> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
>> index 06ff9d2278a..e58580bb828 100644
>> --- a/gcc/config/arm/arm-builtins.c
>> +++ b/gcc/config/arm/arm-builtins.c
>> @@ -738,13 +738,13 @@ arm_strss_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>  static enum arm_type_qualifiers
>>  arm_strsbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>= { qualifier_void, qualifier_unsigned, qualifier_immediate,
>> -  qualifier_none, qualifier_unsigned};
>> +  qualifier_none, qualifier_predicate};
>>  #define STRSBS_P_QUALIFIERS (arm_strsbs_p_qualifiers)
>>  
>>  static enum arm_type_qualifiers
>>  arm_strsbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>= { qualifier_void, qualifier_unsigned, qualifier_immediate,
>> -  qualifier_unsigned, qualifier_unsigned};
>> +  qualifier_unsigned, qualifier_predicate};
>>  #define STRSBU_P_QUALIFIERS (arm_strsbu_p_qualifiers)
>>  
>>  static enum arm_type_qualifiers
>> @@ -780,13 +780,13 @@ arm_ldrgbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>  static enum arm_type_qualifiers
>>  arm_ldrgbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>= { qualifier_none, qualifier_unsigned, qualifier_immediate,
>> -  qualifier_unsigned};
>> +  qualifier_predicate};
>>  #define LDRGBS_Z_QUALIFIERS (arm_ldrgbs_z_qualifiers)
>>  
>>  static enum arm_type_qualifiers
>>  arm_ldrgbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate,
>> -  qualifier_unsigned};
>> +  qualifier_predicate};
>>  #define LDRGBU_Z_QUALIFIERS (arm_ldrgbu_z_qualifiers)
>>  
>>  static enum arm_type_qualifiers
>> @@ -826,7 +826,7 @@ arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>  static enum arm_type_qualifiers
>>  arm_ldrgbwbxu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate,
>> -  qualifier_unsigned};
>> +  qualifier_predicate};
>>  #define LDRGBWBXU_Z_QUALIFIERS (arm_ldrgbwbxu_z_qualifiers)
>>  
>>  static enum arm_type_qualifiers
>> @@ -842,13 +842,13 @@ arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>  static enum arm_type_qualifiers
>>  arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>= { qualifier_none, qualifier_unsigned, qualifier_immediate,
>> -  qualifier_unsigned};
>> +  qualifier_predicate};
>>  #define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers)
>>  
>>  static enum arm_type_qualifiers
>>  arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate,
>> -  qualifier_unsigned};
>> +  qualifier_predicate};
>>  #define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers)
>>  
>>  static enum arm_type_qualifiers
>> @@ -864,13 +864,13 @@ arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>  static enum arm_type_qualifiers
>>  arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>= { qualifier_unsigned, qualifier_unsigned, qualifier_const,
>> -  qualifier_none, qualifier_unsigned};
>> +  qualifier_none, qualifier_predicate};
>>  #define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers)
>>  
>>  static enum arm_type_qualifiers
>>  arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
>>= { qualifier_unsigned, qualifier_unsigned, qualifier_const,
>> -  qualifier_unsigned, qualifier_unsigned};
>> +  qualifier_unsi

Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional

2021-10-11 Thread Richard Sandiford via Gcc-patches

Prathamesh Kulkarni  writes:
> On Fri, 8 Oct 2021 at 21:19, Richard Sandiford
>  wrote:
>>
>> Thanks for looking at this.
>>
>> Prathamesh Kulkarni  writes:
>> > Hi,
>> > As mentioned in PR, for the following test-case:
>> >
>> > typedef unsigned char uint8_t;
>> >
>> > static inline uint8_t
>> > x264_clip_uint8(uint8_t x)
>> > {
>> >   uint8_t t = -x;
>> >   uint8_t t1 = x & ~63;
>> >   return (t1 != 0) ? t : x;
>> > }
>> >
>> > void
>> > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
>> > {
>> >   for (int x = 0; x < n*16; x++)
>> > dst[x] = x264_clip_uint8(src[x]);
>> > }
>> >
>> > -O3 -mcpu=generic+sve generates following code for the inner loop:
>> >
>> > .L3:
>> > ld1bz0.b, p0/z, [x1, x2]
>> > movprfx z2, z0
>> > and z2.b, z2.b, #0xc0
>> > movprfx z1, z0
>> > neg z1.b, p1/m, z0.b
>> > cmpeq   p2.b, p1/z, z2.b, #0
>> > sel z0.b, p2, z0.b, z1.b
>> > st1bz0.b, p0, [x0, x2]
>> > add x2, x2, x4
>> > whilelo p0.b, w2, w3
>> > b.any   .L3
>> >
>> > The sel is redundant since we could conditionally negate z0 based on
>> > the predicate
>> > comparing z2 with 0.
>> >
>> > As suggested in the PR, the attached patch, introduces a new
>> > conditional internal function .COND_NEG, and in gimple-isel replaces
>> > the following sequence:
>> >op2 = -op1
>> >op0 = A cmp B
>> >lhs = op0 ? op1 : op2
>> >
>> > with:
>> >op0 = A inverted_cmp B
>> >lhs = .COND_NEG (op0, op1, op1).
>> >
>> > lhs = .COD_NEG (op0, op1, op1)
>> > implies
>> > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.
>> >
>> > With patch, it generates the following code-gen:
>> > .L3:
>> > ld1bz0.b, p0/z, [x1, x2]
>> > movprfx z1, z0
>> > and z1.b, z1.b, #0xc0
>> > cmpne   p1.b, p2/z, z1.b, #0
>> > neg z0.b, p1/m, z0.b
>> > st1bz0.b, p0, [x0, x2]
>> > add x2, x2, x4
>> > whilelo p0.b, w2, w3
>> > b.any   .L3
>> >
>> > While it seems to work for this test-case, I am not entirely sure if
>> > the patch is correct. Does it look in the right direction ?
>>
>> For binary ops we use match.pd rather than isel:
>>
>> (for uncond_op (UNCOND_BINARY)
>>  cond_op (COND_BINARY)
>>  (simplify
>>   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
>>   (with { tree op_type = TREE_TYPE (@4); }
>>(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
>> op_type)
>> && is_truth_type_for (op_type, TREE_TYPE (@0)))
>> (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))
>>  (simplify
>>   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
>>   (with { tree op_type = TREE_TYPE (@4); }
>>(if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), 
>> op_type)
>> && is_truth_type_for (op_type, TREE_TYPE (@0)))
>> (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))
>>
>> I think it'd be good to do the same here, using new (UN)COND_UNARY
>> iterators.  (The iterators will only have one value to start with,
>> but other unary ops could get the same treatment in future.)
> Thanks for the suggestions.
> The attached patch adds a pattern to match.pd to replace:
> cond = a cmp b
> r = cond ? x : -x
> with:
> cond = a inverted_cmp b
> r = cond ? -x : x
>
> Code-gen with patch for inner loop:
> .L3:
> ld1bz0.b, p0/z, [x1, x2]
> movprfx z1, z0
> and z1.b, z1.b, #0xc0
> cmpne   p1.b, p2/z, z1.b, #0
> neg z0.b, p1/m, z0.b
> st1bz0.b, p0, [x0, x2]
> add x2, x2, x4
> whilelo p0.b, w2, w3
> b.any   .L3
>
> Does it look OK ?
> I didn't add it under (UN)COND_UNARY since it inverts the comparison,
> which we might not want to do for other unary ops ?

I think we should follow the structure of the current binary and
ternary patterns: cope with unary operations in either arm of the
vec_cond and use bit_not for the case in which the unary operation
is in the “false” arm of the vec_cond.

The bit_not will be folded away if the comparison can be inverted,
but it will be left in-place if the comparison can't be inverted
(as for some FP comparisons).

Thanks,
Richard

>
> Also, I am not sure, how to test if target supports conditional
> internal function ?
> I tried to use:
> (for cmp (tcc_comparison)
>  icmp (inverted_tcc_comparison)
>  (simplify
>   (vec_cond (cmp@2 @0 @1) @3 (negate @3))
>(with { auto op_type = TREE_TYPE (@2); }
> (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type)
>  && is_truth_type_for (op_type, TREE_TYPE (@0)))
>   (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3)
>
> but both the conditions seem to fail.
>
> Thanks,
> Prathamesh
>
>
>>
>> Richard
>>
>>
>> >
>> > Thanks,
>> > Prathamesh
>> >
>> > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
>> > index 38e90933c3e..5b0dd3c1993 100644
>> > --- a/

Re: [PATCH v4] attribs: Implement -Wno-attributes=vendor::attr [PR101940]

2021-10-11 Thread Marek Polacek via Gcc-patches

Ping.

On Tue, Sep 28, 2021 at 04:20:46PM -0400, Marek Polacek wrote:
> On Thu, Sep 23, 2021 at 02:25:16PM -0400, Jason Merrill wrote:
> > On 9/20/21 18:59, Marek Polacek via Gcc-patches wrote:
> > > +void
> > > +handle_ignored_attributes_option (vec *v)
> > > +{
> > > +  if (v == nullptr)
> > > +return;
> > > +
> > > +  for (auto opt : v)
> > > +{
> > > +  if (strcmp (opt, "clang") == 0)
> > > + {
> > > +   // TODO
> > > +   continue;
> > > + }
> > 
> > If this doesn't work yet, let's not accept it at all for now.
> 
> Ok.
>  
> > > +  char *q = strstr (opt, "::");
> > > +  /* We don't accept '::attr'.  */
> > > +  if (q == nullptr || q == opt)
> > > + {
> > > +   error ("wrong argument to ignored attributes");
> > > +   inform (input_location, "valid format is %, %, "
> > > +   "or %");
> > 
> > ...or even mention it.  Users can ignore clang:: instead, it doesn't matter
> > to us if clang attributes are misspelled.
> 
> Removed.
> 
> > > +   continue;
> > > + }
> > > +  /* Cut off the vendor part.  */
> > > +  *q = '\0';
> > > +  char *vendor = opt;
> > > +  char *attr = q + 2;
> > > +  /* Verify that they look valid.  */
> > > +  auto valid_p = [](const char *s) {
> > > + for (; *s != '\0'; ++s)
> > > +   if (!ISALNUM (*s) && *s != '_')
> > > + return false;
> > > + return true;
> > > +  };
> > > +  if (!valid_p (vendor) || !valid_p (attr))
> > > + {
> > > +   error ("wrong argument to ignored attributes");
> > > +   continue;
> > > + }
> > > +  /* Turn "__attr__" into "attr" so that we have a canonical form of
> > > +  attribute names.  Likewise for vendor.  */
> > > +  auto strip = [](char *&s) {
> > > + const size_t l = strlen (s);
> > > + if (l > 4 && s[0] == '_' && s[1] == '_'
> > > + && s[l - 1] == '_' && s[l - 2] == '_')
> > > +   {
> > > + s[l - 2] = '\0';
> > > + s += 2;
> > > +   }
> > > +  };
> > > +  strip (attr);
> > > +  strip (vendor);
> > > +  /* If we've already seen this vendor::attr, ignore it.  Attempting 
> > > to
> > > +  register it twice would lead to a crash.  */
> > > +  if (lookup_scoped_attribute_spec (get_identifier (vendor),
> > > + get_identifier (attr)))
> > > + continue;
> > > +  /* In the "vendor::" case, we should ignore *any* attribute coming
> > > +  from this attribute namespace.  */
> > > +  const bool ignored_ns = attr[0] == '\0';
> > 
> > Maybe set attr to nullptr instead of declaring ignored_ns?
> > 
> > > +  /* Create a table with extra attributes which we will register.
> > > +  We can't free it here, so squirrel away the pointers.  */
> > > +  attribute_spec *table = new attribute_spec[2];
> > > +  ignored_attributes_table.safe_push (table);
> > > +  table[0] = { ignored_ns ? nullptr : attr, 0, 0, false, false,
> > 
> > ...so this can just use attr.
> 
> I also need ignored_ns...
>  
> > > +false, false, nullptr, nullptr };
> > > +  table[1] = { nullptr, 0, 0, false, false, false, false, nullptr, 
> > > nullptr };
> > > +  register_scoped_attributes (table, vendor, ignored_ns);
> 
> ...here, but I tweaked this a bit to get rid of the bool.
> 
> > > +}
> > > +}
> > > +
> > > +/* Free data we might have allocated when adding extra attributes.  */
> > > +
> > > +void
> > > +free_attr_data ()
> > > +{
> > > +  for (auto x : ignored_attributes_table)
> > > +delete[] x;
> > > +}
> > 
> > You probably also want to zero out ignored_attributes_table at this point.
> 
> Done.
> 
> > >   /* Initialize attribute tables, and make some sanity checks if checking 
> > > is
> > >  enabled.  */
> > > @@ -252,6 +353,9 @@ init_attributes (void)
> > >   /* Put all the GNU attributes into the "gnu" namespace.  */
> > >   register_scoped_attributes (attribute_tables[i], "gnu");
> > > +  vec *ignored = (vec *) flag_ignored_attributes;
> > > +  handle_ignored_attributes_option (ignored);
> > > +
> > > invoke_plugin_callbacks (PLUGIN_ATTRIBUTES, NULL);
> > > attributes_initialized = true;
> > >   }
> > > @@ -456,6 +560,19 @@ diag_attr_exclusions (tree last_decl, tree node, 
> > > tree attrname,
> > > return found;
> > >   }
> > > +/* Return true iff we should not complain about unknown attributes
> > > +   coming from the attribute namespace NS.  This is the case for
> > > +   the -Wno-attributes=ns:: command-line option.  */
> > > +
> > > +static bool
> > > +attr_namespace_ignored_p (tree ns)
> > > +{
> > > +  if (ns == NULL_TREE)
> > > +return false;
> > > +  scoped_attributes *r = find_attribute_namespace (IDENTIFIER_POINTER 
> > > (ns));
> > > +  return r && r->ignored_p;
> > > +}
> > > +
> > >   /* Process the attributes listed in ATTRIBUTES and install them in 
> > > *NODE,
> > >  which is either a DECL (including a TYPE_DECL) or a TYPE.  If a DECL,
> > >  it should be modified in place; if a TYPE, a copy should be created
> > > @@ -556,7 +673,8 @@ decl_a

Re: [PATCH] attribs: Allow optional second arg for attr deprecated [PR102049]

2021-10-11 Thread Marek Polacek via Gcc-patches

Any thoughts?

On Thu, Sep 23, 2021 at 12:16:36PM -0400, Marek Polacek via Gcc-patches wrote:
> Clang implements something we don't have:
> 
> __attribute__((deprecated("message", "replacement")));
> 
> which seems pretty neat so I wrote this patch to add it to gcc.
> 
> It doesn't allow the optional second argument in the standard [[]]
> form so as not to clash with possible future standard additions.
> 
> I had hoped we could print a nice fix-it replacement hint, but that
> won't be possible until warn_deprecated_use gets something better than
> input_location.
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> 
>   PR c++/102049
> 
> gcc/c-family/ChangeLog:
> 
>   * c-attribs.c (c_common_attribute_table): Increase max_len for
>   deprecated.
>   (handle_deprecated_attribute): Allow an optional second argument
>   in the GNU form of attribute deprecated.
> 
> gcc/c/ChangeLog:
> 
>   * c-parser.c (c_parser_std_attribute): Give a diagnostic when
>   the standard form of an attribute deprecated has a second argument.
> 
> gcc/ChangeLog:
> 
>   * doc/extend.texi: Document attribute deprecated with an
>   optional second argument.
>   * tree.c (warn_deprecated_use): Print the replacement argument,
>   if any.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/c2x-attr-deprecated-3.c: Adjust dg-error.
>   * c-c++-common/Wdeprecated-arg-1.c: New test.
> ---
>  gcc/c-family/c-attribs.c  | 17 -
>  gcc/c/c-parser.c  |  8 ++
>  gcc/doc/extend.texi   | 24 ++
>  .../c-c++-common/Wdeprecated-arg-1.c  | 21 
>  gcc/testsuite/gcc.dg/c2x-attr-deprecated-3.c  |  2 +-
>  gcc/tree.c| 25 +++
>  6 files changed, 90 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/c-c++-common/Wdeprecated-arg-1.c
> 
> diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
> index 007b928c54b..ef857a9ae2c 100644
> --- a/gcc/c-family/c-attribs.c
> +++ b/gcc/c-family/c-attribs.c
> @@ -409,7 +409,7 @@ const struct attribute_spec c_common_attribute_table[] =
>   to prevent its usage in source code.  */
>{ "no vops",0, 0, true,  false, false, false,
> handle_novops_attribute, NULL },
> -  { "deprecated", 0, 1, false, false, false, false,
> +  { "deprecated", 0, 2, false, false, false, false,
> handle_deprecated_attribute, NULL },
>{ "unavailable",0, 1, false, false, false, false,
> handle_unavailable_attribute, NULL },
> @@ -4107,6 +4107,21 @@ handle_deprecated_attribute (tree *node, tree name,
>error ("deprecated message is not a string");
>*no_add_attrs = true;
>  }
> +  else if (TREE_CHAIN (args) != NULL_TREE)
> +{
> +  /* We allow an optional second argument in the GNU form of
> +  attribute deprecated, which specifies the replacement.  */
> +  if (flags & ATTR_FLAG_CXX11)
> + {
> +   error ("replacement argument only allowed in GNU attributes");
> +   *no_add_attrs = true;
> + }
> +  else if (TREE_CODE (TREE_VALUE (TREE_CHAIN (args))) != STRING_CST)
> + {
> +   error ("replacement argument is not a string");
> +   *no_add_attrs = true;
> + }
> +}
>  
>if (DECL_P (*node))
>  {
> diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
> index fa29d2c15fc..2b47f01d166 100644
> --- a/gcc/c/c-parser.c
> +++ b/gcc/c/c-parser.c
> @@ -4952,6 +4952,14 @@ c_parser_std_attribute (c_parser *parser, bool for_tm)
>   TREE_VALUE (attribute)
> = c_parser_attribute_arguments (parser, takes_identifier,
> require_string, false);
> + if (c_parser_next_token_is (parser, CPP_COMMA)
> + && strcmp (IDENTIFIER_POINTER (name), "deprecated") == 0)
> +   {
> + error_at (open_loc, "replacement argument only allowed in "
> +   "GNU attributes");
> + c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
> + return error_mark_node;
> +   }
>}
>  else
>c_parser_balanced_token_sequence (parser);
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 9501a60f20e..7d399f4b2bc 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -2860,6 +2860,7 @@ StrongAlias (allocate, alloc);
>  
>  @item deprecated
>  @itemx deprecated (@var{msg})
> +@itemx deprecated (@var{msg}, @var{replacement})
>  @cindex @code{deprecated} function attribute
>  The @code{deprecated} attribute results in a warning if the function
>  is used anywhere in the source file.  This is useful when identifying
> @@ -2880,6 +2881,13 @@ results in a warning on line 3 but not line 2.  The 
> optional @var{msg}
>  argument, which must be a string, is printed in the warning if

Re: [PATCH] rs6000/test: Adjust some cases due to O2 vect [PR102658]

2021-10-11 Thread Segher Boessenkool

Hi!

On Mon, Oct 11, 2021 at 10:47:00AM +0800, Kewen.Lin wrote:
> As PR102658 shows, commit r12-4240 enables vectorization at O2,
> some cases need to be adjusted accordingly for rs6000 port.
> 
> - For target specific test cases, this adds -fno-tree-vectorize
> to retain original test points, otherwise vectorization can
> make some expected scalar instructions gone or generate some
> unexpected instructions for vector construction.

Ah good choice.

> - For generic test cases, it follows the existing suggested
> practice with necessary target/xfail selector.

Not such a great choice.  Many of those tests do not make sense with
vectorisation enabled.  This should have been thought about, in some
cases resulting in not running the test with vectorisation enabled, and
in some cases duplicating the test, once with and once without
vectorisation.

But you are just following established practice, so :-)

> -  struct A1 a = { 0, { 1 } };   // { dg-warning "\\\[-Wstringop-overflow" "" 
> { target { i?86-*-* x86_64-*-* } } }
> +  struct A1 a = { 0, { 1 } };   // { dg-warning "\\\[-Wstringop-overflow" "" 
> { target { i?86-*-* x86_64-*-* powerpc*-*-* } } }

I don't know if powerpc*-*-* is the correct choice in all these cases.
Sometimes it might have to be powerpc*-*-linux* or similar.  We'll find
out :-)

(An xfail causes XPASS if the test does *not* fail).

> +/* Now O2 enables vectorization by default, which generates unexpected float
> +   conversion for vector construction, so simply disable it.  */

It is good to see these comments.  I love puzzles, but not in the
testsuite! :-)

Okay for trunk.  Thanks!

Segher

Re: [PATCH, rs6000] Disable gimple fold for float or double vec_minmax when fast-math is not set

2021-10-11 Thread Segher Boessenkool

Hi!

On Thu, Aug 26, 2021 at 09:19:30AM +0800, HAO CHEN GUI wrote:
> gcc/
> ?? * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin):
> ?? Modify the VSX_BUILTIN_XVMINDP, ALTIVEC_BUILTIN_VMINFP,
> ?? VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP expansions.

Something mangles your email, giving all those question marks.  It is
hard to review like this.

Don't send patches as replies in threads please.  It is impossible to
keep track of such things.

> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -12159,6 +12159,11 @@ rs6000_gimple_fold_builtin 
> (gimple_stmt_iterator *gsi)
>  return true;
>  /* flavors of vec_min.?? */
>  case VSX_BUILTIN_XVMINDP:
> +?? case ALTIVEC_BUILTIN_VMINFP:
> +?? if (!flag_finite_math_only || flag_signed_zeros)
> +?? return false;
> +?? /* Fall through to MIN_EXPR.?? */
> +?? gcc_fallthrough ();
>  case P8V_BUILTIN_VMINSD:
>  case P8V_BUILTIN_VMINUD:
>  case ALTIVEC_BUILTIN_VMINSB:

Yeah I would rather not review this like this :-)


Segher

Re: [RFC] ldist: Recognize rawmemchr loop patterns

2021-10-11 Thread Stefan Schulze Frielinghaus via Gcc-patches

On Fri, Sep 17, 2021 at 10:08:27AM +0200, Richard Biener wrote:
> On Mon, Sep 13, 2021 at 4:53 PM Stefan Schulze Frielinghaus
>  wrote:
> >
> > On Mon, Sep 06, 2021 at 11:56:21AM +0200, Richard Biener wrote:
> > > On Fri, Sep 3, 2021 at 10:01 AM Stefan Schulze Frielinghaus
> > >  wrote:
> > > >
> > > > On Fri, Aug 20, 2021 at 12:35:58PM +0200, Richard Biener wrote:
> > > > [...]
> > > > > > >
> > > > > > > +  /* Handle strlen like loops.  */
> > > > > > > +  if (store_dr == NULL
> > > > > > > +  && integer_zerop (pattern)
> > > > > > > +  && TREE_CODE (reduction_iv.base) == INTEGER_CST
> > > > > > > +  && TREE_CODE (reduction_iv.step) == INTEGER_CST
> > > > > > > +  && integer_onep (reduction_iv.step)
> > > > > > > +  && (types_compatible_p (TREE_TYPE (reduction_var), 
> > > > > > > size_type_node)
> > > > > > > + || TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (reduction_var
> > > > > > > +{
> > > > > > >
> > > > > > > I wonder what goes wrong with a larger or smaller wrapping IV 
> > > > > > > type?
> > > > > > > The iteration
> > > > > > > only stops when you load a NUL and the increments just wrap along 
> > > > > > > (you're
> > > > > > > using the pointer IVs to compute the strlen result).  Can't you 
> > > > > > > simply truncate?
> > > > > >
> > > > > > I think truncation is enough as long as no overflow occurs in 
> > > > > > strlen or
> > > > > > strlen_using_rawmemchr.
> > > > > >
> > > > > > > For larger than size_type_node (actually larger than 
> > > > > > > ptr_type_node would matter
> > > > > > > I guess), the argument is that since pointer wrapping would be 
> > > > > > > undefined anyway
> > > > > > > the IV cannot wrap either.  Now, the correct check here would 
> > > > > > > IMHO be
> > > > > > >
> > > > > > >   TYPE_PRECISION (TREE_TYPE (reduction_var)) < TYPE_PRECISION
> > > > > > > (ptr_type_node)
> > > > > > >|| TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (pointer-iv-var))
> > > > > > >
> > > > > > > ?
> > > > > >
> > > > > > Regarding the implementation which makes use of rawmemchr:
> > > > > >
> > > > > > We can count at most PTRDIFF_MAX many bytes without an overflow.  
> > > > > > Thus,
> > > > > > the maximal length we can determine of a string where each 
> > > > > > character has
> > > > > > size S is PTRDIFF_MAX / S without an overflow.  Since an overflow 
> > > > > > for
> > > > > > ptrdiff type is undefined we have to make sure that if an overflow
> > > > > > occurs, then an overflow occurs for reduction variable, too, and 
> > > > > > that
> > > > > > this is undefined, too.  However, I'm not sure anymore whether we 
> > > > > > want
> > > > > > to respect overflows in all cases.  If TYPE_PRECISION 
> > > > > > (ptr_type_node)
> > > > > > equals TYPE_PRECISION (ptrdiff_type_node) and an overflow occurs, 
> > > > > > then
> > > > > > this would mean that a single string consumes more than half of the
> > > > > > virtual addressable memory.  At least for architectures where
> > > > > > TYPE_PRECISION (ptrdiff_type_node) == 64 holds, I think it is 
> > > > > > reasonable
> > > > > > to neglect the case where computing pointer difference may overflow.
> > > > > > Otherwise we are talking about strings with lenghts of multiple
> > > > > > pebibytes.  For other architectures we might have to be more precise
> > > > > > and make sure that reduction variable overflows first and that this 
> > > > > > is
> > > > > > undefined.
> > > > > >
> > > > > > Thus a conservative condition would be (I assumed that the size of 
> > > > > > any
> > > > > > integral type is a power of two which I'm not sure if this really 
> > > > > > holds;
> > > > > > IIRC the C standard requires only that the alignment is a power of 
> > > > > > two
> > > > > > but not necessarily the size so I might need to change this):
> > > > > >
> > > > > > /* Compute precision (reduction_var) < (precision (ptrdiff_type) - 
> > > > > > 1 - log2 (sizeof (load_type))
> > > > > >or in other words return true if reduction variable overflows 
> > > > > > first
> > > > > >and false otherwise.  */
> > > > > >
> > > > > > static bool
> > > > > > reduction_var_overflows_first (tree reduction_var, tree load_type)
> > > > > > {
> > > > > >   unsigned precision_ptrdiff = TYPE_PRECISION (ptrdiff_type_node);
> > > > > >   unsigned precision_reduction_var = TYPE_PRECISION (TREE_TYPE 
> > > > > > (reduction_var));
> > > > > >   unsigned size_exponent = wi::exact_log2 (wi::to_wide 
> > > > > > (TYPE_SIZE_UNIT (load_type)));
> > > > > >   return wi::ltu_p (precision_reduction_var, precision_ptrdiff - 1 
> > > > > > - size_exponent);
> > > > > > }
> > > > > >
> > > > > > TYPE_PRECISION (ptrdiff_type_node) == 64
> > > > > > || (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (reduction_var))
> > > > > > && reduction_var_overflows_first (reduction_var, load_type)
> > > > > >
> > > > > > Regarding the implementation which makes use of strlen:
> > > > > >
> > > > > > I'm not sure what it means if strlen is called

Re: [PATCH, rs6000] Disable gimple fold for float or double vec_minmax when fast-math is not set

2021-10-11 Thread Segher Boessenkool

On Mon, Oct 11, 2021 at 10:55:36AM -0500, Segher Boessenkool wrote:
> On Thu, Aug 26, 2021 at 09:19:30AM +0800, HAO CHEN GUI wrote:
> > gcc/
> > ?? * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin):
> > ?? Modify the VSX_BUILTIN_XVMINDP, ALTIVEC_BUILTIN_VMINFP,
> > ?? VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP expansions.
> 
> Something mangles your email, giving all those question marks.  It is
> hard to review like this.

These were non-breaking spaces (u+00a0).  Probably caused by
format=flowed, the grest destroyer of patches.


Segher

Re: PING [PATCH] doc: improve -fsanitize=undefined description

2021-10-11 Thread Richard Sandiford via Gcc-patches

Diane Meirowitz via Gcc-patches  writes:
> Please review my patch. It is tiny. Thank you.

Thanks for the patch and sorry for the very slow response.
I've now pushed this to master and all active branches.

Thanks,
Richard

> Diane
>
> On 9/15/21, 5:02 PM, "Diane Meirowitz"  wrote:
>
>
> doc: improve -fsanitize=undefined description
>
> gcc/ChangeLog:
> * doc/invoke.texi: add link to UndefinedBehaviorSanitizer 
> documentation,
> mention UBSAN_OPTIONS, similar to what is done for 
> AddressSanitizer.
>
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 78cfc100ac2..f022885edf8 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -15200,7 +15200,8 @@ The option cannot be combined with 
> @option{-fsanitize=thread}.
> @opindex fsanitize=undefined
> Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector.
> Various computations are instrumented to detect undefined behavior
> -at runtime.  Current suboptions are:
> +at runtime.  See 
> @uref{https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html} for more 
> details.   The run-time behavior can be influenced using the
> +@env{UBSAN_OPTIONS} environment variable.  Current suboptions are:
>
> @table @gcctabopt

Re: [PATCH] rs6000/test: Adjust some cases due to O2 vect [PR102658]

2021-10-11 Thread Martin Sebor via Gcc-patches


On 10/11/21 9:30 AM, Segher Boessenkool wrote:

Hi!

On Mon, Oct 11, 2021 at 10:47:00AM +0800, Kewen.Lin wrote:

As PR102658 shows, commit r12-4240 enables vectorization at O2,
some cases need to be adjusted accordingly for rs6000 port.

- For target specific test cases, this adds -fno-tree-vectorize
to retain original test points, otherwise vectorization can
make some expected scalar instructions gone or generate some
unexpected instructions for vector construction.


Ah good choice.


- For generic test cases, it follows the existing suggested
practice with necessary target/xfail selector.


Not such a great choice.  Many of those tests do not make sense with
vectorisation enabled.  This should have been thought about, in some
cases resulting in not running the test with vectorisation enabled, and
in some cases duplicating the test, once with and once without
vectorisation.


The tests detect bugs that are present both with and without
vetctorization, so they should pass both ways.  That they don't
tells us that that the warnings need work (they were written with
an assumption that doesn't hold anymore).  We need to track that
work somehow, but simply xfailing them without making a record
of what underlying problem the xfails correspond to isn't the best
way.  In my experience, what works well is opening a bug for each
distinct limitation (if one doesn't already exist) and adding
a reference to it as a comment to the xfail.



But you are just following established practice, so :-)


-  struct A1 a = { 0, { 1 } };   // { dg-warning "\\\[-Wstringop-overflow" "" { 
target { i?86-*-* x86_64-*-* } } }
+  struct A1 a = { 0, { 1 } };   // { dg-warning "\\\[-Wstringop-overflow" "" { 
target { i?86-*-* x86_64-*-* powerpc*-*-* } } }


As I mentioned in the bug, when adding xfails for regressions
please be sure to reference the bug that tracks the underlying
root cause.  There may be multiple problems, and we need to
identify what it is in each instance.  As the author of
the tests I can help with that but not if I'm not in the loop
on these changes (it would seem prudent to get the author's
thoughts on such sweeping changes to their work).

I discussed one of these failures with Hongtao in detail at
the time autovectorization was being enabled and made the same
request then but I didn't realize the problem was so pervasive.

In addition, the target-specific conditionals in the xfails are
going to be difficult to maintain.  It might be okay for one or
two in a single test but for so many we need a better solution
than that.  If autovectorization is only enabled for a subset
of targets then a solution might be to add a new DejagGNU test
for it and conditionalize the xfails on it.

Martin



I don't know if powerpc*-*-* is the correct choice in all these cases.
Sometimes it might have to be powerpc*-*-linux* or similar.  We'll find
out :-)

(An xfail causes XPASS if the test does *not* fail).


+/* Now O2 enables vectorization by default, which generates unexpected float
+   conversion for vector construction, so simply disable it.  */


It is good to see these comments.  I love puzzles, but not in the
testsuite! :-)

Okay for trunk.  Thanks!


Segher

Re: [PATCH 1/5]AArch64 sve: combine inverted masks into NOTs

2021-10-11 Thread Richard Sandiford via Gcc-patches

Tamar Christina  writes:
> Hi,
>
> Sending a new version of the patch because I noticed the pattern was 
> overriding the nor pattern.
>
> A second pattern is needed to capture the nor case as combine will match the
> longest sequence first.  So without this pattern we end up de-optimizing nor
> and instead emit two nots.  I did not find a better way to do this.

Hmm, that's unfortunate.  But yeah, I don't know of a better way
of avoiding it either.

There's a risk we might need a pattern with the operands swapped
as well (so that the (not (reg …)) comes first) but it would be better
to avoid that using a new canonicalisation rule if necessary.

> Note: This patch series is working incrementally towards generating the most
>   efficient code for this and other loops in small steps.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-sve.md (*fcm_bic_combine,
> *fcm_nor_combine, *fcmuo_bic_combine,
> *fcmuo_nor_combine): New.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/sve/pred-not-gen.c-1: New test.
> * gcc.target/aarch64/sve/pred-not-gen.c-2: New test.
> * gcc.target/aarch64/sve/pred-not-gen.c-3: New test.
> * gcc.target/aarch64/sve/pred-not-gen.c-4: New test.

OK, thanks.

Richard

>
> --- inline copy of patch ---
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 
> 359fe0e457096cf4042a774789a5c241420703d3..8fe4c721313e70592d2cf0acbfbe2f07b070b51a
>  100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -8126,6 +8126,160 @@ (define_insn_and_split "*fcmuo_and_combine"
>   UNSPEC_COND_FCMUO))]
>  )
>
> +;; Similar to *fcm_and_combine, but for BIC rather than AND.
> +;; In this case, we still need a separate NOT/BIC operation, but predicating
> +;; the comparison on the BIC operand removes the need for a PTRUE.
> +(define_insn_and_split "*fcm_bic_combine"
> +  [(set (match_operand: 0 "register_operand" "=Upa")
> +   (and:
> + (and:
> +   (not:
> + (unspec:
> +   [(match_operand: 1)
> +(const_int SVE_KNOWN_PTRUE)
> +(match_operand:SVE_FULL_F 2 "register_operand" "w")
> +(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" 
> "wDz")]
> +   SVE_COND_FP_CMP_I0))
> +   (match_operand: 4 "register_operand" "Upa"))
> + (match_dup: 1)))
> +   (clobber (match_scratch: 5 "=&Upl"))]
> +  "TARGET_SVE"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 5)
> +   (unspec:
> + [(match_dup 4)
> +  (const_int SVE_MAYBE_NOT_PTRUE)
> +  (match_dup 2)
> +  (match_dup 3)]
> + SVE_COND_FP_CMP_I0))
> +   (set (match_dup 0)
> +   (and:
> + (not:
> +   (match_dup 5))
> + (match_dup 4)))]
> +{
> +  if (can_create_pseudo_p ())
> +operands[5] = gen_reg_rtx (mode);
> +}
> +)
> +
> +;; Make sure that we expand to a nor when the operand 4 of
> +;; *fcm_bic_combine is a not.
> +(define_insn_and_split "*fcm_nor_combine"
> +  [(set (match_operand: 0 "register_operand" "=Upa")
> +   (and:
> + (and:
> +   (not:
> + (unspec:
> +   [(match_operand: 1)
> +(const_int SVE_KNOWN_PTRUE)
> +(match_operand:SVE_FULL_F 2 "register_operand" "w")
> +(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" 
> "wDz")]
> +   SVE_COND_FP_CMP_I0))
> +   (not:
> + (match_operand: 4 "register_operand" "Upa")))
> + (match_dup: 1)))
> +   (clobber (match_scratch: 5 "=&Upl"))]
> +  "TARGET_SVE"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 5)
> +   (unspec:
> + [(match_dup 1)
> +  (const_int SVE_KNOWN_PTRUE)
> +  (match_dup 2)
> +  (match_dup 3)]
> + SVE_COND_FP_CMP_I0))
> +   (set (match_dup 0)
> +   (and:
> + (and:
> +   (not:
> + (match_dup 5))
> +   (not:
> + (match_dup 4)))
> + (match_dup 1)))]
> +{
> +  if (can_create_pseudo_p ())
> +operands[5] = gen_reg_rtx (mode);
> +}
> +)
> +
> +(define_insn_and_split "*fcmuo_bic_combine"
> +  [(set (match_operand: 0 "register_operand" "=Upa")
> +   (and:
> + (and:
> +   (not:
> + (unspec:
> +   [(match_operand: 1)
> +(const_int SVE_KNOWN_PTRUE)
> +(match_operand:SVE_FULL_F 2 "register_operand" "w")
> +(match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" 
> "wDz")]
> +   UNSPEC_COND_FCMUO))
> +   (match_operand: 4 "register_operand" "Upa"))
> + (match_dup: 1)))
> +   (clobber (match_scratch: 5 "=&Upl"))]
> +  "TARGET_SVE"
> +  "#"
> +  "&& 1"
> +  [(set (match_dup 5)
> +   (unspec:
> + [(match_dup 4)
> +

Re: PING [PATCH] doc: improve -fsanitize=undefined description

2021-10-11 Thread Diane Meirowitz via Gcc-patches

Richard,

Thank you!

Diane

> On Oct 11, 2021, at 12:15 PM, Richard Sandiford  
> wrote:
> 
> Diane Meirowitz via Gcc-patches  writes:
>> Please review my patch. It is tiny. Thank you.
> 
> Thanks for the patch and sorry for the very slow response.
> I've now pushed this to master and all active branches.
> 
> Thanks,
> Richard
> 
>> Diane
>> 
>> On 9/15/21, 5:02 PM, "Diane Meirowitz"  wrote:
>> 
>> 
>>doc: improve -fsanitize=undefined description
>> 
>>gcc/ChangeLog:
>>* doc/invoke.texi: add link to UndefinedBehaviorSanitizer 
>> documentation,
>>mention UBSAN_OPTIONS, similar to what is done for 
>> AddressSanitizer.
>> 
>>diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>>index 78cfc100ac2..f022885edf8 100644
>>--- a/gcc/doc/invoke.texi
>>+++ b/gcc/doc/invoke.texi
>>@@ -15200,7 +15200,8 @@ The option cannot be combined with 
>> @option{-fsanitize=thread}.
>>@opindex fsanitize=undefined
>>Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector.
>>Various computations are instrumented to detect undefined behavior
>>-at runtime.  Current suboptions are:
>>+at runtime.  See 
>> @uref{https://urldefense.com/v3/__https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html__;!!ACWV5N9M2RV99hQ!dN9bn2CtYUf6VFb3UO4i75Zzr6HpusAphlsd6n2QvKtF42rMqfoQhVZ3ZGKy-lviZid-$
>>  } for more details.   The run-time behavior can be influenced using the
>>+@env{UBSAN_OPTIONS} environment variable.  Current suboptions are:
>> 
>>@table @gcctabopt

Re: [PATCH v3 1/6] rs6000: Support SSE4.1 "round" intrinsics

2021-10-11 Thread Segher Boessenkool

On Mon, Oct 11, 2021 at 08:46:17AM -0500, Paul A. Clarke wrote:
> On Fri, Oct 08, 2021 at 05:31:11PM -0500, Segher Boessenkool wrote:
> > "asm volatile" means there is a side effect in the asm.  This means that
> > it has to be executed on the real machine the same as on the abstract
> > machine, with the side effects in the same order.
> > 
> > It can still be reordered, modulo those restrictions.  It can be merged
> > with an identical asm as well.  And the compiler can split this into two
> > identical asms on two paths.
> 
> It seems odd to me that the compiler can make any assumptions about
> the side-effect(s). How does it know that a side-effect does not alter
> computation (as it indeed does in this case), such that reordering is
> a still correct (which it wouldn't be in this case)?

Because by definition side effects do not change the computation (where
"computation" means "the outputs of the asm")!

And if you are talking about changing future computations, as floating
point control flags can be used for: this falls ouside of the C abstract
machine, other than fe[gs]etround etc.

> > > With respect to volatile, I worry about removing it, because I do
> > > indeed need that instruction to execute in order to clear the FPSCR
> > > exception enable bits. That side-effect is not otherwise known to the
> > > compiler.
> > 
> > Yes.  But as said above, volatile isn't enough to get this to behave
> > correctly.
> > 
> > The easiest way out is to write this all in one piece of (inline) asm.
> 
> Ugh. I really don't want to go there, not just because it's work, but
> I think this is a paradigm that should work without needing to drop
> fully into asm.

Yes.  Let's say GCC still has some challenges here :-(

> Is there something unique about using an "asm" statement versus using,
> say, a builtin like __builtin_mtfsf or a hypothetical __builtin_mffsce?

Nope.

> Very similar methods are used in glibc today. Are those broken?

Maybe.  If you get a real (i.e. not inline) function call there, that
can save you often.

> Would creating a __builtin_mffsce be another solution?

Yes.  And not a bad idea in the first place.

> Would adding memory barriers between the FPSCR manipulations and the
> code which is bracketed by them be sufficient?

No, what you want to order is not memory accesses, but FP computations
relative to the insns that change the FP control bits.  If *both* of
those change memory you can artificially order them with that.  But most
FP computations do not access memory.


Segher

Re: [PATCH 2/5]AArch64 sve: combine nested if predicates

2021-10-11 Thread Richard Sandiford via Gcc-patches

Tamar Christina  writes:
>> > Note: This patch series is working incrementally towards generating the
>> most
>> >   efficient code for this and other loops in small steps.
>> 
>> It looks like this could be done in the vectoriser via an extension of the
>> scalar_cond_masked_set mechanism.  We have:
>> 
>>   mask__54.13_59 = vect_a_15.9_55 > vect_b_17.12_58;
>>   vec_mask_and_60 = loop_mask_32 & mask__54.13_59;
>>   …
>>   mask__30.17_67 = vect_a_15.9_55 > vect_cst__66;
>>   mask__29.18_68 = mask__54.13_59 & mask__30.17_67;
>>   vec_mask_and_69 = loop_mask_32 & mask__29.18_68;
>> 
>> When vectorising mask__29.18_68, we could test whether each side of the
>> "&" is already in scalar_cond_masked_set and AND in the loop mask if so, like
>> we do in vectorizable_condition.  We could then separately record that the &
>> result includes the loop mask.
>
> When never a mask is being generated from an BIT_AND we mask the operands of
> the and instead and then just AND the result.
>
> This allows us to be able to CSE the masks and generate the right combination.
> However because re-assoc will try to re-order the masks in the & we have to 
> now
> perform a small local CSE on the vectorized loop is vectorization is 
> successful.
>
> Note: This patch series is working incrementally towards generating the most
>   efficient code for this and other loops in small steps.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and no 
> issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>   * tree-vectorizer.c (vectorize_loops): Do local CSE through RPVN upon
>   successful vectorization.
>   * tree-vect-stmts.c (prepare_load_store_mask): When combining two masks
>   mask the operands instead of the combined operation.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/aarch64/sve/pred-combine-and.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
> new file mode 100644
> index 
> ..d395b7f84bb15b588493611df5a47549726ac24a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
> @@ -0,0 +1,18 @@
> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-options "-O3 --save-temps" } */
> +
> +void f5(float * restrict z0, float * restrict z1, float *restrict x, float * 
> restrict y, float c, int n)
> +{
> +for (int i = 0; i < n; i++) {
> +float a = x[i];
> +float b = y[i];
> +if (a > b) {
> +z0[i] = a + b;
> +if (a > c) {
> +z1[i] = a - b;
> +}
> +}
> +}
> +}
> +
> +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, 
> z[0-9]+\.s, z[0-9]+\.s} 2 } } */
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 
> 4e0b2adf1dc2404bc345af30cfeb9c819084894e..717a25f46aa72534eebeb382c92b9145d7d44d04
>  100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -1799,6 +1799,19 @@ prepare_load_store_mask (tree mask_type, tree 
> loop_mask, tree vec_mask,
>  return vec_mask;
>  
>gcc_assert (TREE_TYPE (loop_mask) == mask_type);
> +
> +  /* Check if the mask is a combination of two different masks.  */
> +  gimple *def_stmt = SSA_NAME_DEF_STMT (vec_mask);
> +  if (is_gimple_assign (def_stmt)
> +  && gimple_assign_rhs_code (def_stmt) == BIT_AND_EXPR)
> +{
> +  tree lhs1 = gimple_assign_rhs1 (def_stmt);
> +  tree lhs2 = gimple_assign_rhs2 (def_stmt);
> +
> +  vec_mask = prepare_load_store_mask (mask_type, loop_mask, lhs1, gsi);
> +  loop_mask = prepare_load_store_mask (mask_type, loop_mask, lhs2, gsi);
> +}
> +

I think this is doing something different from what I suggested above.

I was thinking that we should do this when vectorising the AND itself
(mask__29.18_68 in the example above), using scalar_cond_masked_set
to check whether either side is or is going to be ANDed with the
loop mask.

That way we never generate more loop masks than we need to,
whereas the above version could.

Thanks,
Richard

[PATCH] libstdc++: Check [ptr,end) and [ptr,ptr+n) ranges with _GLIBCXX_ASSERTIONS

2021-10-11 Thread Jonathan Wakely via Gcc-patches

This enables lightweight checks for the __glibcxx_requires_valid_range
and __glibcxx_requires_string_len macros  when _GLIBCXX_ASSERTIONS is
defined.  By using __builtin_object_size we can check whether the end of
the range is part of the same object as the start of the range, and
detect problems like in PR 89927.

libstdc++-v3/ChangeLog:

* include/debug/debug.h (__valid_range_p, __valid_range_n): New
inline functions using __builtin_object_size to check ranges
delimited by pointers.
[_GLIBCXX_ASSERTIONS] (__glibcxx_requires_valid_range): Use
__valid_range_p.
[_GLIBCXX_ASSERTIONS] (__glibcxx_requires_string_len): Use
__valid_range_n.


The first patch allows us to detect bugs like string("foo", "bar"),
like in PR 89927. Debug mode cannot currently detect this. The new
check uses the compiler built-in to detect when the two arguments are
not part of the same object. This assumes we're optimizing and the
compiler knows the values of the pointers. If it doesn't, then the
function just returns true and should inline to nothing.

I would like to also enable that for Debug Mode, otherwise we have
checks that work for _GLIBCXX_ASSERTIONS but not for _GLIBCXX_DEBUG. I
tried to make that work with the second patch attached to this mail,
but it doesn't abort for the example in PR 89927. I think puttingthe
checks inside the "real" debug checking functions is too many levels
of inlining and the compiler "forgets" the pointer values.

I think the first patch is worth committing. It should add no overhead
for optimized builds, and diagnoses some bugs that we do not diagnose
today. I'm less sure about the second, since it doesn't actually help.
Maybe the second one should wait for Siddhesh's
__builtin_dynamic_object_size to land on trunk.

Taking this idea further, we could do something similar for
__glibcxx_requires_string, which is currently almost useless (it only
checks if the pointer is null) but could be changed to use
__valid_range_n(_String, char_traits<...>::length(_String))
so that we can diagnose non-null terminated strings (because the
length that char-traits would give us would be larger than the size
that __builtin_object_size would give us).

Thoughts?


commit b008cc08c6b05e32c896ed6e5a3e289ccf8f3c91
Author: Jonathan Wakely 
Date:   Mon Oct 11 15:58:43 2021

libstdc++: Check [ptr,end) and [ptr,ptr+n) ranges with _GLIBCXX_ASSERTIONS

This enables lightweight checks for the __glibcxx_requires_valid_range
and __glibcxx_requires_string_len macros  when _GLIBCXX_ASSERTIONS is
defined.  By using __builtin_object_size we can check whether the end of
the range is part of the same object as the start of the range, and
detect problems like in PR 89927.

libstdc++-v3/ChangeLog:

* include/debug/debug.h (__valid_range_p, __valid_range_n): New
inline functions using __builtin_object_size to check ranges
delimited by pointers.
[_GLIBCXX_ASSERTIONS] (__glibcxx_requires_valid_range): Use
__valid_range_p.
[_GLIBCXX_ASSERTIONS] (__glibcxx_requires_string_len): Use
__valid_range_n.

diff --git a/libstdc++-v3/include/debug/debug.h 
b/libstdc++-v3/include/debug/debug.h
index 116f2f023e2..1db5aa34c55 100644
--- a/libstdc++-v3/include/debug/debug.h
+++ b/libstdc++-v3/include/debug/debug.h
@@ -59,12 +59,46 @@ namespace __gnu_debug
 
   template
 struct _Safe_iterator;
+
+#ifdef _GLIBCXX_ASSERTIONS
+  template
+__attribute__((__always_inline__))
+_GLIBCXX14_CONSTEXPR inline bool
+__valid_range_p(_Tp* __first, _Tp* __last) _GLIBCXX_NOEXCEPT
+{
+  __UINTPTR_TYPE__ __f = (__UINTPTR_TYPE__)__first;
+  __UINTPTR_TYPE__ __l = (__UINTPTR_TYPE__)__last;
+  if (const std::size_t __sz = __builtin_object_size(__first, 3))
+   return __f <= __l && (__l - __f) <= __sz;
+  return true;
+}
+
+#ifndef _GLIBCXX_DEBUG
+  // __glibcxx_requires_valid_range uses this overload for non-pointers.
+  template
+__attribute__((__always_inline__))
+_GLIBCXX14_CONSTEXPR inline bool
+__valid_range_p(_Tp, _Tp) _GLIBCXX_NOEXCEPT
+{ return true; }
+#endif
+
+  template
+_GLIBCXX14_CONSTEXPR __attribute__((__always_inline__))
+inline bool
+__valid_range_n(_Tp* __first, std::size_t __n) _GLIBCXX_NOEXCEPT
+{
+  if (const std::size_t __sz = __builtin_object_size(__first, 3))
+   return __n <= __sz;
+  return true;
+}
+#endif
 }
 
 #ifndef _GLIBCXX_DEBUG
 
 # define __glibcxx_requires_cond(_Cond,_Msg)
-# define __glibcxx_requires_valid_range(_First,_Last)
+# define __glibcxx_requires_valid_range(_First,_Last) \
+  __glibcxx_assert(__gnu_debug::__valid_range_p(_First, _Last))
 # define __glibcxx_requires_can_increment(_First,_Size)
 # define __glibcxx_requires_can_increment_range(_First1,_Last1,_First2)
 # define __glibcxx_requires_can_decrement_range(_First1,_Last1,_First2)
@@ -79,7 +113,8 @@ namesp

1 2 >

1 - 100 of 137 matches

Mail list logo