Re: [PATCH] X86: Add an option -muse-unaligned-vector-move

2021-10-20 Thread Richard Biener via Gcc-patches
On Wed, Oct 20, 2021 at 7:31 AM dianhong.xu--- via Gcc-patches
 wrote:
>
> From: dianhong xu 
>
> Add -muse-unaligned-vector-move option to emit unaligned vector move
> instaructions.

Why would you ever want to have such option?!  Should the documentation
at least read "emit unaligned vector moves even for aligned storage or when
using aligned move intrinsics"?

Richard.

> gcc/ChangeLog:
>
> * config/i386/i386-options.c (ix86_target_string): Add
> -muse-unaligned-vector-move.
> * config/i386/i386.c (ix86_get_ssemov): Emit unaligned vector if use
> the new option.
> * config/i386/i386.opt (muse-unaligned-vector-move): New.
> * config/i386/sse.md: Emit unaligned vector if use this new option
> * doc/invoke.texi: Document -muse-unaligned-vector-move
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/avx2-vector-unaligned-load-store-1.c: New test.
> * gcc.target/i386/avx2-vector-unaligned-load-store-2.c: New test.
> * gcc.target/i386/avx2-vector-unaligned-load-store-3.c: New test.
> * gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c: New test.
> ---
>  gcc/config/i386/i386-options.c|   3 +-
>  gcc/config/i386/i386.c|  41 +++
>  gcc/config/i386/i386.opt  |   4 +
>  gcc/config/i386/sse.md|  30 +++--
>  gcc/doc/invoke.texi   |   7 ++
>  .../i386/avx2-vector-unaligned-load-store-1.c | 102 +
>  .../i386/avx2-vector-unaligned-load-store-2.c | 107 ++
>  .../i386/avx2-vector-unaligned-load-store-3.c |  11 ++
>  .../avx512vl-vector-unaligned-load-store-1.c  |  13 +++
>  9 files changed, 287 insertions(+), 31 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-2.c
>  create mode 100644 
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-3.c
>  create mode 100644 
> gcc/testsuite/gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c
>
> diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
> index c9523b26f49..eacbd0f5451 100644
> --- a/gcc/config/i386/i386-options.c
> +++ b/gcc/config/i386/i386-options.c
> @@ -397,7 +397,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
>  { "-mstv", MASK_STV },
>  { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
>  { "-mavx256-split-unaligned-store",
> MASK_AVX256_SPLIT_UNALIGNED_STORE },
> -{ "-mcall-ms2sysv-xlogues",MASK_CALL_MS2SYSV_XLOGUES }
> +{ "-mcall-ms2sysv-xlogues",MASK_CALL_MS2SYSV_XLOGUES },
> +{ "-muse-unaligned-vector-move",   MASK_USE_UNALIGNED_VECTOR_MOVE }
>};
>
>/* Additional flag options.  */
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index f111411e599..7581e854021 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -5323,8 +5323,9 @@ ix86_get_ssemov (rtx *operands, unsigned size,
>  enum attr_mode insn_mode, machine_mode mode)
>  {
>char buf[128];
> -  bool misaligned_p = (misaligned_operand (operands[0], mode)
> -  || misaligned_operand (operands[1], mode));
> +  bool need_unaligned_p = (TARGET_USE_UNALIGNED_VECTOR_MOVE
> +  || misaligned_operand (operands[0], mode)
> +  || misaligned_operand (operands[1], mode));
>bool evex_reg_p = (size == 64
>  || EXT_REX_SSE_REG_P (operands[0])
>  || EXT_REX_SSE_REG_P (operands[1]));
> @@ -5380,17 +5381,17 @@ ix86_get_ssemov (rtx *operands, unsigned size,
> {
> case opcode_int:
>   if (scalar_mode == E_HFmode)
> -   opcode = (misaligned_p
> +   opcode = (need_unaligned_p
>   ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
>   : "vmovdqa64");
>   else
> -   opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
> +   opcode = need_unaligned_p ? "vmovdqu32" : "vmovdqa32";
>   break;
> case opcode_float:
> - opcode = misaligned_p ? "vmovups" : "vmovaps";
> + opcode = need_unaligned_p ? "vmovups" : "vmovaps";
>   break;
> case opcode_double:
> - opcode = misaligned_p ? "vmovupd" : "vmovapd";
> + opcode = need_unaligned_p ? "vmovupd" : "vmovapd";
>   break;
> }
>  }
> @@ -5399,21 +5400,21 @@ ix86_get_ssemov (rtx *operands, unsigned size,
>switch (scalar_mode)
> {
> case E_HFmode:
> - opcode = (misaligned_p
> + opcode = (need_unaligned_p
> ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
> : "vmovdqa64");
>   break;
> case E_SFmode:
> -   

Re: [PATCH] X86: Add an option -muse-unaligned-vector-move

2021-10-20 Thread Richard Biener via Gcc-patches
On Wed, Oct 20, 2021 at 9:02 AM Richard Biener
 wrote:
>
> On Wed, Oct 20, 2021 at 7:31 AM dianhong.xu--- via Gcc-patches
>  wrote:
> >
> > From: dianhong xu 
> >
> > Add -muse-unaligned-vector-move option to emit unaligned vector move
> > instaructions.
>
> Why would you ever want to have such option?!  Should the documentation
> at least read "emit unaligned vector moves even for aligned storage or when
> using aligned move intrinsics"?

And does it even work?  I fail to see adjustments to memory operands of
SSE/AVX instructions that have to be aligned and now would need to be
pushed to separate unaligned moves with an extra register?

Richard.

>
> Richard.
>
> > gcc/ChangeLog:
> >
> > * config/i386/i386-options.c (ix86_target_string): Add
> > -muse-unaligned-vector-move.
> > * config/i386/i386.c (ix86_get_ssemov): Emit unaligned vector if use
> > the new option.
> > * config/i386/i386.opt (muse-unaligned-vector-move): New.
> > * config/i386/sse.md: Emit unaligned vector if use this new option
> > * doc/invoke.texi: Document -muse-unaligned-vector-move
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/avx2-vector-unaligned-load-store-1.c: New test.
> > * gcc.target/i386/avx2-vector-unaligned-load-store-2.c: New test.
> > * gcc.target/i386/avx2-vector-unaligned-load-store-3.c: New test.
> > * gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c: New 
> > test.
> > ---
> >  gcc/config/i386/i386-options.c|   3 +-
> >  gcc/config/i386/i386.c|  41 +++
> >  gcc/config/i386/i386.opt  |   4 +
> >  gcc/config/i386/sse.md|  30 +++--
> >  gcc/doc/invoke.texi   |   7 ++
> >  .../i386/avx2-vector-unaligned-load-store-1.c | 102 +
> >  .../i386/avx2-vector-unaligned-load-store-2.c | 107 ++
> >  .../i386/avx2-vector-unaligned-load-store-3.c |  11 ++
> >  .../avx512vl-vector-unaligned-load-store-1.c  |  13 +++
> >  9 files changed, 287 insertions(+), 31 deletions(-)
> >  create mode 100644 
> > gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-1.c
> >  create mode 100644 
> > gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-2.c
> >  create mode 100644 
> > gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-3.c
> >  create mode 100644 
> > gcc/testsuite/gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c
> >
> > diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
> > index c9523b26f49..eacbd0f5451 100644
> > --- a/gcc/config/i386/i386-options.c
> > +++ b/gcc/config/i386/i386-options.c
> > @@ -397,7 +397,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT 
> > isa2,
> >  { "-mstv", MASK_STV },
> >  { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD },
> >  { "-mavx256-split-unaligned-store",
> > MASK_AVX256_SPLIT_UNALIGNED_STORE },
> > -{ "-mcall-ms2sysv-xlogues",MASK_CALL_MS2SYSV_XLOGUES }
> > +{ "-mcall-ms2sysv-xlogues",MASK_CALL_MS2SYSV_XLOGUES },
> > +{ "-muse-unaligned-vector-move",   MASK_USE_UNALIGNED_VECTOR_MOVE }
> >};
> >
> >/* Additional flag options.  */
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index f111411e599..7581e854021 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -5323,8 +5323,9 @@ ix86_get_ssemov (rtx *operands, unsigned size,
> >  enum attr_mode insn_mode, machine_mode mode)
> >  {
> >char buf[128];
> > -  bool misaligned_p = (misaligned_operand (operands[0], mode)
> > -  || misaligned_operand (operands[1], mode));
> > +  bool need_unaligned_p = (TARGET_USE_UNALIGNED_VECTOR_MOVE
> > +  || misaligned_operand (operands[0], mode)
> > +  || misaligned_operand (operands[1], mode));
> >bool evex_reg_p = (size == 64
> >  || EXT_REX_SSE_REG_P (operands[0])
> >  || EXT_REX_SSE_REG_P (operands[1]));
> > @@ -5380,17 +5381,17 @@ ix86_get_ssemov (rtx *operands, unsigned size,
> > {
> > case opcode_int:
> >   if (scalar_mode == E_HFmode)
> > -   opcode = (misaligned_p
> > +   opcode = (need_unaligned_p
> >   ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
> >   : "vmovdqa64");
> >   else
> > -   opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
> > +   opcode = need_unaligned_p ? "vmovdqu32" : "vmovdqa32";
> >   break;
> > case opcode_float:
> > - opcode = misaligned_p ? "vmovups" : "vmovaps";
> > + opcode = need_unaligned_p ? "vmovups" : "vmovaps";
> >   break;
> > case opcode_double:
> > - opcode = misaligned_p ? "vmovupd" : "vmovapd";
> > 

Re: [aarch64] PR102376 - Emit better diagnostic for arch extensions in target attr

2021-10-20 Thread Prathamesh Kulkarni via Gcc-patches
On Tue, 19 Oct 2021 at 19:58, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> > Hi,
> > The attached patch emits a more verbose diagnostic for target attribute that
> > is an architecture extension needing a leading '+'.
> >
> > For the following test,
> > void calculate(void) __attribute__ ((__target__ ("sve")));
> >
> > With patch, the compiler now emits:
> > 102376.c:1:1: error: arch extension ‘sve’ should be prepended with ‘+’
> > 1 | void calculate(void) __attribute__ ((__target__ ("sve")));
> >   | ^~~~
> >
> > instead of:
> > 102376.c:1:1: error: pragma or attribute ‘target("sve")’ is not valid
> > 1 | void calculate(void) __attribute__ ((__target__ ("sve")));
> >   | ^~~~
>
> Nice :-)
>
> > (This isn't specific to sve though).
> > OK to commit after bootstrap+test ?
> >
> > Thanks,
> > Prathamesh
> >
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index a9a1800af53..975f7faf968 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -17821,7 +17821,16 @@ aarch64_process_target_attr (tree args)
> >num_attrs++;
> >if (!aarch64_process_one_target_attr (token))
> >   {
> > -   error ("pragma or attribute % is not valid", 
> > token);
> > +   /* Check if token is possibly an arch extension without
> > +  leading '+'.  */
> > +   char *str = (char *) xmalloc (strlen (token) + 2);
> > +   str[0] = '+';
> > +   strcpy(str + 1, token);
>
> I think std::string would be better here, e.g.:
>
>   auto with_plus = std::string ("+") + token;
>
> > +   if (aarch64_handle_attr_isa_flags (str))
> > + error("arch extension %<%s%> should be prepended with %<+%>", 
> > token);
>
> Nit: should be a space before the “(”.
>
> In principle, a fixit hint would have been nice here, but I don't think
> we have enough information to provide one.  (Just saying for the record.)
Thanks for the suggestions.
Does the attached patch look OK ?

Thanks,
Prathamesh
>
> Thanks,
> Richard
>
> > +   else
> > + error ("pragma or attribute % is not valid", 
> > token);
> > +   free (str);
> > return false;
> >   }
> >
[aarch64] PR102376 - Emit better diagnostics for arch extension in target 
attribute.

gcc/ChangeLog:
PR target/102376
* config/aarch64/aarch64.c (aarch64_handle_attr_isa_flags): Change str's
type to const char *.
(aarch64_process_target_attr): Check if token is possibly an arch 
extension
without leading '+' and emit diagnostic accordingly.

gcc/testsuite/ChangeLog:
PR target/102376
* gcc.target/aarch64/pr102376.c: New test.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a9a1800af53..b72079bc466 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -17548,7 +17548,7 @@ aarch64_handle_attr_tune (const char *str)
modified.  */
 
 static bool
-aarch64_handle_attr_isa_flags (char *str)
+aarch64_handle_attr_isa_flags (const char *str)
 {
   enum aarch64_parse_opt_result parse_res;
   uint64_t isa_flags = aarch64_isa_flags;
@@ -17821,7 +17821,13 @@ aarch64_process_target_attr (tree args)
   num_attrs++;
   if (!aarch64_process_one_target_attr (token))
{
- error ("pragma or attribute % is not valid", token);
+ /* Check if token is possibly an arch extension without
+leading '+'.  */
+ auto with_plus = std::string("+") + token;
+ if (aarch64_handle_attr_isa_flags (with_plus.c_str ()))
+   error ("arch extension %<%s%> should be prepended with %<+%>", 
token);
+ else
+   error ("pragma or attribute % is not valid", 
token);
  return false;
}
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr102376.c 
b/gcc/testsuite/gcc.target/aarch64/pr102376.c
new file mode 100644
index 000..efd15f6ca9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr102376.c
@@ -0,0 +1,3 @@
+/* { dg-do compile } */
+
+void calculate(void) __attribute__ ((__target__ ("sve"))); /* { dg-error "arch 
extension 'sve' should be prepended with '\\+'" } */


Re: [PATCH] gcc-changelog: Add libffi/ to ignored_prefixes

2021-10-20 Thread Martin Liška

On 10/20/21 01:23, H.J. Lu wrote:

Add libffi/ to ignored_prefixes for syncing with libffi upstream:


Sure, please push it.

Martin


[PATCH] Restore --param=max-fsm-thread-length

2021-10-20 Thread Aldy Hernandez via Gcc-patches
The removal of --param=max-fsm-thread-length is causing code
explosion.  I thought that --param=max-fsm-thread-path-insns was a
better gague for path profitability than raw BB length, but it turns
out that we don't take into account PHIs when estimating the number of
statements.

In this PR, we have a sequence of very large PHIs that have us
traversing extremely large paths that blow up the compilation.

We could fix this a couple of different ways.  We could avoid
traversing more than a certain number of PHI arguments, or ignore
large PHIs altogether.  The old implementation certainly had this
knob, and we could cut things off before we even got to the ranger.
We could also adjust the instruction estimation to take into account
PHIs, but I'm sure we'll mess something else in the process ;-).

The easiest thing to do is just restore the knob.

At a later time we could tweak this further, for instance,
disregarding empty blocks in the count.  BTW, this is the reason I
didn't chop things off in the lowlevel registry for all threaders: the
forward threader can't really explore too deep paths, but it could
theoretically get there while threading over empty blocks.

This fixes 102814, 102852, and I bet it solves the Linux kernel cross
compile issue.

I will commit this pending tests on x86-64 Linux.

gcc/ChangeLog:

PR tree-optimization/102814
* doc/invoke.texi: Document --param=max-fsm-thread-length.
* params.opt: Add --param=max-fsm-thread-length.
* tree-ssa-threadbackward.c
(back_threader_profitability::profitable_path_p): Fail on paths
longer than max-fsm-thread-length.
---
 gcc/doc/invoke.texi   | 3 +++
 gcc/params.opt| 4 
 gcc/tree-ssa-threadbackward.c | 9 +
 3 files changed, 16 insertions(+)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 0cc8a8edd05..c93d822431f 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14468,6 +14468,9 @@ Emit instrumentation calls to __tsan_func_entry() and 
__tsan_func_exit().
 Maximum number of instructions to copy when duplicating blocks on a
 finite state automaton jump thread path.
 
+@item max-fsm-thread-length
+Maximum number of basic blocks on a jump thread path.
+
 @item parloops-chunk-size
 Chunk size of omp schedule for loops parallelized by parloops.
 
diff --git a/gcc/params.opt b/gcc/params.opt
index 06a6fdc9deb..83b3db6fea6 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -533,6 +533,10 @@ The maximum number of nested indirect inlining performed 
by early inliner.
 Common Joined UInteger Var(param_max_fields_for_field_sensitive) Param
 Maximum number of fields in a structure before pointer analysis treats the 
structure as a single variable.
 
+-param=max-fsm-thread-length=
+Common Joined UInteger Var(param_max_fsm_thread_length) Init(10) 
IntegerRange(1, 99) Param Optimization
+Maximum number of basic blocks on a jump thread path.
+
 -param=max-fsm-thread-path-insns=
 Common Joined UInteger Var(param_max_fsm_thread_path_insns) Init(100) 
IntegerRange(1, 99) Param Optimization
 Maximum number of instructions to copy when duplicating blocks on a finite 
state automaton jump thread path.
diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 8770be88706..e378adbbf53 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -620,6 +620,15 @@ back_threader_profitability::profitable_path_p (const 
vec &m_path,
   if (m_path.length () <= 1)
   return false;
 
+  if (m_path.length () > (unsigned) param_max_fsm_thread_length)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file, "  FAIL: Jump-thread path not considered: "
+"the number of basic blocks on the path "
+"exceeds PARAM_MAX_FSM_THREAD_LENGTH.\n");
+  return false;
+}
+
   int n_insns = 0;
   gimple_stmt_iterator gsi;
   loop_p loop = m_path[0]->loop_father;
-- 
2.31.1



[COMMITTED] Remove unused back_threader_registry::m_threaded_paths.

2021-10-20 Thread Aldy Hernandez via Gcc-patches
Tested on x86-64 Linux.

gcc/ChangeLog:

* tree-ssa-threadbackward.c 
(back_threader_registry::back_threader_registry):
Remove.
(back_threader_registry::register_path): Remove m_threaded_paths.
---
 gcc/tree-ssa-threadbackward.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index e378adbbf53..96422a1390e 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -52,12 +52,10 @@ along with GCC; see the file COPYING3.  If not see
 class back_threader_registry
 {
 public:
-  back_threader_registry ();
   bool register_path (const vec &, edge taken);
   bool thread_through_all_blocks (bool may_peel_loop_headers);
 private:
   back_jt_path_registry m_lowlevel_registry;
-  int m_threaded_paths;
 };
 
 // Class to abstract the profitability code for the backwards threader.
@@ -574,11 +572,6 @@ back_threader::debug ()
   dump (stderr);
 }
 
-back_threader_registry::back_threader_registry ()
-{
-  m_threaded_paths = 0;
-}
-
 bool
 back_threader_registry::thread_through_all_blocks (bool may_peel_loop_headers)
 {
@@ -928,9 +921,7 @@ back_threader_registry::register_path (const 
vec &m_path,
 
   m_lowlevel_registry.push_edge (jump_thread_path,
 taken_edge, EDGE_NO_COPY_SRC_BLOCK);
-
-  if (m_lowlevel_registry.register_jump_thread (jump_thread_path))
-++m_threaded_paths;
+  m_lowlevel_registry.register_jump_thread (jump_thread_path);
   return true;
 }
 
-- 
2.31.1



[committed] openmp: Fix up struct gomp_work_share handling [PR102838]

2021-10-20 Thread Jakub Jelinek via Gcc-patches
Hi!

If GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC is not defined, the intent was to
treat the split of the structure between first cacheline (64 bytes)
as mostly write-once, use afterwards and second cacheline as rw just
as an optimization.  But as has been reported, with vectorization enabled
at -O2 it can now result in aligned vector 16-byte or larger stores.
When not having posix_memalign/aligned_alloc/memalign or other similar API,
alloc.c emulates it but it needs to allocate extra memory for the dynamic
realignment.
So, for the GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC not defined case, this patch
stops using aligned (64) attribute in the middle of the structure and instead
inserts padding that puts the second half of the structure at offset 64 bytes.

And when GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC is defined, usually it was allocated
as aligned, but for the orphaned case it could still be allocated just with
gomp_malloc without guaranteed proper alignment.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2021-10-20  Jakub Jelinek  

PR libgomp/102838
* libgomp.h (struct gomp_work_share_1st_cacheline): New type.
(struct gomp_work_share): Only use aligned(64) attribute if
GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC is defined, otherwise just
add padding before lock to ensure lock is at offset 64 bytes
into the structure.
(gomp_workshare_struct_check1, gomp_workshare_struct_check2):
New poor man's static assertions.
* work.c (gomp_work_share_start): Use gomp_aligned_alloc instead of
gomp_malloc if GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC.

--- libgomp/libgomp.h.jj2021-10-11 21:54:00.189992429 +0200
+++ libgomp/libgomp.h   2021-10-19 15:54:32.085056239 +0200
@@ -257,6 +257,30 @@ struct gomp_doacross_work_share
   unsigned int shift_counts[];
 };
 
+/* Like struct gomp_work_share, but only the 1st cacheline of it plus
+   flexible array at the end.
+   Keep in sync with struct gomp_work_share.  */
+struct gomp_work_share_1st_cacheline
+{
+  enum gomp_schedule_type sched;
+  int mode;
+  union {
+struct {
+  long chunk_size, end, incr;
+};
+struct {
+  unsigned long long chunk_size_ull, end_ull, incr_ull;
+};
+  };
+  union {
+unsigned *ordered_team_ids;
+struct gomp_doacross_work_share *doacross;
+  };
+  unsigned ordered_num_used, ordered_owner, ordered_cur;
+  struct gomp_work_share *next_alloc;
+  char pad[];
+};
+
 struct gomp_work_share
 {
   /* This member records the SCHEDULE clause to be used for this construct.
@@ -324,7 +348,12 @@ struct gomp_work_share
  are in a different cache line.  */
 
   /* This lock protects the update of the following members.  */
+#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC
   gomp_mutex_t lock __attribute__((aligned (64)));
+#else
+  char pad[64 - offsetof (struct gomp_work_share_1st_cacheline, pad)];
+  gomp_mutex_t lock;
+#endif
 
   /* This is the count of the number of threads that have exited the work
  share construct.  If the construct was marked nowait, they have moved on
@@ -362,6 +391,12 @@ struct gomp_work_share
   unsigned inline_ordered_team_ids[0];
 };
 
+extern char gomp_workshare_struct_check1
+  [offsetof (struct gomp_work_share_1st_cacheline, next_alloc)
+   == offsetof (struct gomp_work_share, next_alloc) ? 1 : -1];
+extern char gomp_workshare_struct_check2
+  [offsetof (struct gomp_work_share, lock) == 64 ? 1 : -1];
+
 /* This structure contains all of the thread-local data associated with 
a thread team.  This is the data that must be saved when a thread
encounters a nested PARALLEL construct.  */
--- libgomp/work.c.jj   2021-01-05 00:13:58.260297586 +0100
+++ libgomp/work.c  2021-10-19 15:55:38.955116579 +0200
@@ -191,7 +191,12 @@ gomp_work_share_start (size_t ordered)
   /* Work sharing constructs can be orphaned.  */
   if (team == NULL)
 {
+#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC
+  ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
+  sizeof (*ws));
+#else
   ws = gomp_malloc (sizeof (*ws));
+#endif
   gomp_init_work_share (ws, ordered, 1);
   thr->ts.work_share = ws;
   return true;

Jakub



Re: [PATCH] X86: Add an option -muse-unaligned-vector-move

2021-10-20 Thread Xu Dianhong via Gcc-patches
Thanks for the comments.

> And does it even work?
It works, I checked it in the test case, and when using this option, it can
emit an unaligned vector move.
>I fail to see adjustments to memory operands of
SSE/AVX instructions that have to be aligned
I changed all vector move in "get_ssemov" without checking the move with
memory operands or not.
>and now would need to be
pushed to separate unaligned moves with an extra register?
I think it did not use an extra register. I'm not sure if I got your
question, and this patch just change the final operator of SSE MOVE from
aligned operator to unaligned operator, and I did not change the operands.

On Wed, Oct 20, 2021 at 3:04 PM Richard Biener 
wrote:

> On Wed, Oct 20, 2021 at 9:02 AM Richard Biener
>  wrote:
> >
> > On Wed, Oct 20, 2021 at 7:31 AM dianhong.xu--- via Gcc-patches
> >  wrote:
> > >
> > > From: dianhong xu 
> > >
> > > Add -muse-unaligned-vector-move option to emit unaligned vector move
> > > instaructions.
> >
> > Why would you ever want to have such option?!  Should the documentation
> > at least read "emit unaligned vector moves even for aligned storage or
> when
> > using aligned move intrinsics"?
>
> And does it even work?  I fail to see adjustments to memory operands of
> SSE/AVX instructions that have to be aligned and now would need to be
> pushed to separate unaligned moves with an extra register?
>
> Richard.
>
> >
> > Richard.
> >
> > > gcc/ChangeLog:
> > >
> > > * config/i386/i386-options.c (ix86_target_string): Add
> > > -muse-unaligned-vector-move.
> > > * config/i386/i386.c (ix86_get_ssemov): Emit unaligned vector
> if use
> > > the new option.
> > > * config/i386/i386.opt (muse-unaligned-vector-move): New.
> > > * config/i386/sse.md: Emit unaligned vector if use this new
> option
> > > * doc/invoke.texi: Document -muse-unaligned-vector-move
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.target/i386/avx2-vector-unaligned-load-store-1.c: New
> test.
> > > * gcc.target/i386/avx2-vector-unaligned-load-store-2.c: New
> test.
> > > * gcc.target/i386/avx2-vector-unaligned-load-store-3.c: New
> test.
> > > * gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c:
> New test.
> > > ---
> > >  gcc/config/i386/i386-options.c|   3 +-
> > >  gcc/config/i386/i386.c|  41 +++
> > >  gcc/config/i386/i386.opt  |   4 +
> > >  gcc/config/i386/sse.md|  30 +++--
> > >  gcc/doc/invoke.texi   |   7 ++
> > >  .../i386/avx2-vector-unaligned-load-store-1.c | 102 +
> > >  .../i386/avx2-vector-unaligned-load-store-2.c | 107 ++
> > >  .../i386/avx2-vector-unaligned-load-store-3.c |  11 ++
> > >  .../avx512vl-vector-unaligned-load-store-1.c  |  13 +++
> > >  9 files changed, 287 insertions(+), 31 deletions(-)
> > >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-1.c
> > >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-2.c
> > >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-3.c
> > >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c
> > >
> > > diff --git a/gcc/config/i386/i386-options.c
> b/gcc/config/i386/i386-options.c
> > > index c9523b26f49..eacbd0f5451 100644
> > > --- a/gcc/config/i386/i386-options.c
> > > +++ b/gcc/config/i386/i386-options.c
> > > @@ -397,7 +397,8 @@ ix86_target_string (HOST_WIDE_INT isa,
> HOST_WIDE_INT isa2,
> > >  { "-mstv", MASK_STV },
> > >  { "-mavx256-split-unaligned-load",
> MASK_AVX256_SPLIT_UNALIGNED_LOAD },
> > >  { "-mavx256-split-unaligned-store",
> MASK_AVX256_SPLIT_UNALIGNED_STORE },
> > > -{ "-mcall-ms2sysv-xlogues",
> MASK_CALL_MS2SYSV_XLOGUES }
> > > +{ "-mcall-ms2sysv-xlogues",
> MASK_CALL_MS2SYSV_XLOGUES },
> > > +{ "-muse-unaligned-vector-move",   MASK_USE_UNALIGNED_VECTOR_MOVE
> }
> > >};
> > >
> > >/* Additional flag options.  */
> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > > index f111411e599..7581e854021 100644
> > > --- a/gcc/config/i386/i386.c
> > > +++ b/gcc/config/i386/i386.c
> > > @@ -5323,8 +5323,9 @@ ix86_get_ssemov (rtx *operands, unsigned size,
> > >  enum attr_mode insn_mode, machine_mode mode)
> > >  {
> > >char buf[128];
> > > -  bool misaligned_p = (misaligned_operand (operands[0], mode)
> > > -  || misaligned_operand (operands[1], mode));
> > > +  bool need_unaligned_p = (TARGET_USE_UNALIGNED_VECTOR_MOVE
> > > +  || misaligned_operand (operands[0], mode)
> > > +  || misaligned_operand (operands[1], mode));
> > >bool evex_reg_p = (size == 64
> > >  || EXT_REX_SSE_REG_P (operands[0])
> > >  || EXT_R

Re: [PATCH] X86: Add an option -muse-unaligned-vector-move

2021-10-20 Thread Xu Dianhong via Gcc-patches
Thanks for the comments.

>Why would you ever want to have such option?!
I need to ask @H. J. Lu for help to answer this question. He knows more
about the background. I may not explain it clearly.
>Should the documentation
at least read "emit unaligned vector moves even for aligned storage or when
using aligned move intrinsics"?
Thanks for the mention. I'll add it to the documents later.

On Wed, Oct 20, 2021 at 3:02 PM Richard Biener 
wrote:

> On Wed, Oct 20, 2021 at 7:31 AM dianhong.xu--- via Gcc-patches
>  wrote:
> >
> > From: dianhong xu 
> >
> > Add -muse-unaligned-vector-move option to emit unaligned vector move
> > instaructions.
>
> Why would you ever want to have such option?!  Should the documentation
> at least read "emit unaligned vector moves even for aligned storage or when
> using aligned move intrinsics"?
>
> Richard.
>
> > gcc/ChangeLog:
> >
> > * config/i386/i386-options.c (ix86_target_string): Add
> > -muse-unaligned-vector-move.
> > * config/i386/i386.c (ix86_get_ssemov): Emit unaligned vector if
> use
> > the new option.
> > * config/i386/i386.opt (muse-unaligned-vector-move): New.
> > * config/i386/sse.md: Emit unaligned vector if use this new
> option
> > * doc/invoke.texi: Document -muse-unaligned-vector-move
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/avx2-vector-unaligned-load-store-1.c: New test.
> > * gcc.target/i386/avx2-vector-unaligned-load-store-2.c: New test.
> > * gcc.target/i386/avx2-vector-unaligned-load-store-3.c: New test.
> > * gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c: New
> test.
> > ---
> >  gcc/config/i386/i386-options.c|   3 +-
> >  gcc/config/i386/i386.c|  41 +++
> >  gcc/config/i386/i386.opt  |   4 +
> >  gcc/config/i386/sse.md|  30 +++--
> >  gcc/doc/invoke.texi   |   7 ++
> >  .../i386/avx2-vector-unaligned-load-store-1.c | 102 +
> >  .../i386/avx2-vector-unaligned-load-store-2.c | 107 ++
> >  .../i386/avx2-vector-unaligned-load-store-3.c |  11 ++
> >  .../avx512vl-vector-unaligned-load-store-1.c  |  13 +++
> >  9 files changed, 287 insertions(+), 31 deletions(-)
> >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-1.c
> >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-2.c
> >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-3.c
> >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c
> >
> > diff --git a/gcc/config/i386/i386-options.c
> b/gcc/config/i386/i386-options.c
> > index c9523b26f49..eacbd0f5451 100644
> > --- a/gcc/config/i386/i386-options.c
> > +++ b/gcc/config/i386/i386-options.c
> > @@ -397,7 +397,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT
> isa2,
> >  { "-mstv", MASK_STV },
> >  { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
> },
> >  { "-mavx256-split-unaligned-store",
> MASK_AVX256_SPLIT_UNALIGNED_STORE },
> > -{ "-mcall-ms2sysv-xlogues",
> MASK_CALL_MS2SYSV_XLOGUES }
> > +{ "-mcall-ms2sysv-xlogues",
> MASK_CALL_MS2SYSV_XLOGUES },
> > +{ "-muse-unaligned-vector-move",   MASK_USE_UNALIGNED_VECTOR_MOVE }
> >};
> >
> >/* Additional flag options.  */
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> > index f111411e599..7581e854021 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -5323,8 +5323,9 @@ ix86_get_ssemov (rtx *operands, unsigned size,
> >  enum attr_mode insn_mode, machine_mode mode)
> >  {
> >char buf[128];
> > -  bool misaligned_p = (misaligned_operand (operands[0], mode)
> > -  || misaligned_operand (operands[1], mode));
> > +  bool need_unaligned_p = (TARGET_USE_UNALIGNED_VECTOR_MOVE
> > +  || misaligned_operand (operands[0], mode)
> > +  || misaligned_operand (operands[1], mode));
> >bool evex_reg_p = (size == 64
> >  || EXT_REX_SSE_REG_P (operands[0])
> >  || EXT_REX_SSE_REG_P (operands[1]));
> > @@ -5380,17 +5381,17 @@ ix86_get_ssemov (rtx *operands, unsigned size,
> > {
> > case opcode_int:
> >   if (scalar_mode == E_HFmode)
> > -   opcode = (misaligned_p
> > +   opcode = (need_unaligned_p
> >   ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
> >   : "vmovdqa64");
> >   else
> > -   opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
> > +   opcode = need_unaligned_p ? "vmovdqu32" : "vmovdqa32";
> >   break;
> > case opcode_float:
> > - opcode = misaligned_p ? "vmovups" : "vmovaps";
> > + opcode = need_unaligned_p ? "vmovups"

Re: [PATCH] X86: Add an option -muse-unaligned-vector-move

2021-10-20 Thread Richard Biener via Gcc-patches
On Wed, Oct 20, 2021 at 9:48 AM Xu Dianhong  wrote:
>
> Thanks for the comments.
>
> > And does it even work?
> It works, I checked it in the test case, and when using this option, it can 
> emit an unaligned vector move.
> >I fail to see adjustments to memory operands of
> SSE/AVX instructions that have to be aligned
> I changed all vector move in "get_ssemov" without checking the move with 
> memory operands or not.
> >and now would need to be
> pushed to separate unaligned moves with an extra register?
> I think it did not use an extra register. I'm not sure if I got your 
> question, and this patch just change the final operator of SSE MOVE from 
> aligned operator to unaligned operator, and I did not change the operands.

For example

typedef double v2df __attribute__((vector_size(16)));

v2df a, b;

void foo ()
{
  a += b;
}

will compile to

foo:
.LFB0:
.cfi_startproc
movapd  a(%rip), %xmm0
addpd   b(%rip), %xmm0
movaps  %xmm0, a(%rip)
ret

what should -muse-unaligned-vector-move do here?  The addpd b(%rip), %xmm0
instruction implies an aligned move from b(%rip).

It looks your patch could be better implemented in the assembler, just using
the unaligned encodings for aligned moves?

Richard.

> On Wed, Oct 20, 2021 at 3:04 PM Richard Biener  
> wrote:
>>
>> On Wed, Oct 20, 2021 at 9:02 AM Richard Biener
>>  wrote:
>> >
>> > On Wed, Oct 20, 2021 at 7:31 AM dianhong.xu--- via Gcc-patches
>> >  wrote:
>> > >
>> > > From: dianhong xu 
>> > >
>> > > Add -muse-unaligned-vector-move option to emit unaligned vector move
>> > > instaructions.
>> >
>> > Why would you ever want to have such option?!  Should the documentation
>> > at least read "emit unaligned vector moves even for aligned storage or when
>> > using aligned move intrinsics"?
>>
>> And does it even work?  I fail to see adjustments to memory operands of
>> SSE/AVX instructions that have to be aligned and now would need to be
>> pushed to separate unaligned moves with an extra register?
>>
>> Richard.
>>
>> >
>> > Richard.
>> >
>> > > gcc/ChangeLog:
>> > >
>> > > * config/i386/i386-options.c (ix86_target_string): Add
>> > > -muse-unaligned-vector-move.
>> > > * config/i386/i386.c (ix86_get_ssemov): Emit unaligned vector if 
>> > > use
>> > > the new option.
>> > > * config/i386/i386.opt (muse-unaligned-vector-move): New.
>> > > * config/i386/sse.md: Emit unaligned vector if use this new 
>> > > option
>> > > * doc/invoke.texi: Document -muse-unaligned-vector-move
>> > >
>> > > gcc/testsuite/ChangeLog:
>> > >
>> > > * gcc.target/i386/avx2-vector-unaligned-load-store-1.c: New test.
>> > > * gcc.target/i386/avx2-vector-unaligned-load-store-2.c: New test.
>> > > * gcc.target/i386/avx2-vector-unaligned-load-store-3.c: New test.
>> > > * gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c: New 
>> > > test.
>> > > ---
>> > >  gcc/config/i386/i386-options.c|   3 +-
>> > >  gcc/config/i386/i386.c|  41 +++
>> > >  gcc/config/i386/i386.opt  |   4 +
>> > >  gcc/config/i386/sse.md|  30 +++--
>> > >  gcc/doc/invoke.texi   |   7 ++
>> > >  .../i386/avx2-vector-unaligned-load-store-1.c | 102 +
>> > >  .../i386/avx2-vector-unaligned-load-store-2.c | 107 ++
>> > >  .../i386/avx2-vector-unaligned-load-store-3.c |  11 ++
>> > >  .../avx512vl-vector-unaligned-load-store-1.c  |  13 +++
>> > >  9 files changed, 287 insertions(+), 31 deletions(-)
>> > >  create mode 100644 
>> > > gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-1.c
>> > >  create mode 100644 
>> > > gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-2.c
>> > >  create mode 100644 
>> > > gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-3.c
>> > >  create mode 100644 
>> > > gcc/testsuite/gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c
>> > >
>> > > diff --git a/gcc/config/i386/i386-options.c 
>> > > b/gcc/config/i386/i386-options.c
>> > > index c9523b26f49..eacbd0f5451 100644
>> > > --- a/gcc/config/i386/i386-options.c
>> > > +++ b/gcc/config/i386/i386-options.c
>> > > @@ -397,7 +397,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT 
>> > > isa2,
>> > >  { "-mstv", MASK_STV },
>> > >  { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD 
>> > > },
>> > >  { "-mavx256-split-unaligned-store",
>> > > MASK_AVX256_SPLIT_UNALIGNED_STORE },
>> > > -{ "-mcall-ms2sysv-xlogues",
>> > > MASK_CALL_MS2SYSV_XLOGUES }
>> > > +{ "-mcall-ms2sysv-xlogues",
>> > > MASK_CALL_MS2SYSV_XLOGUES },
>> > > +{ "-muse-unaligned-vector-move",   MASK_USE_UNALIGNED_VECTOR_MOVE }
>> > >};
>> > >
>> > >/* Additional flag options.  */
>> > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386

[RFC] Partial vectors for s390

2021-10-20 Thread Robin Dapp via Gcc-patches

Hi,

I have been playing around with making Kewen's partial vector changes 
workable with s390:


We have a vll instruction that can be passed the highest byte to load. 
The rather unfortunate consequence of this is that a length of zero 
cannot be specified.  The partial vector framework, however, relies a 
lot on the fact that a len_load can be made a NOP using a length of zero.


After confirming an additional zero-check before each vll is definitely 
too slow across SPEC and some discussion with Kewen we figured the 
easiest way forward is to exclude loops with multiple VFs (despite 
giving up vectorization possibilities).  These are prone to len_loads 
with zero while the regular induction variable check prevents them in 
single-VF loops.


So, as a quick hack, I went with

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 75f24e7c4f6..f79222daeb6 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1170,6 +1170,9 @@ vect_verify_loop_lens (loop_vec_info loop_vinfo)
   if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
 return false;

+  if (LOOP_VINFO_LENS (loop_vinfo).length () > 1)
+return false;
+

which could be made a hook, eventually.  FWIW this is sufficient to make 
bootstrap, regtest and compiling the SPEC suites succeed.  I'm unsure 
whether we are guaranteed not to emit len_load with zero now.   On top, 
I subtract 1 from the passed length in the expander, which, supposedly, 
is also not ideal.


There are some regressions that I haven't fully analyzed yet but whether 
and when to actually enable this feature could be a backend decision 
with the necessary middle-end checks already in place.


Any ideas on how to properly check for the zero condition and exclude 
the cases that cause it? Kewen suggested enriching the len_load optabs 
with a separate parameter.


Regards
 Robin


[PATCH] Add new replace script.

2021-10-20 Thread Martin Liška

Hello.

Sometimes, one needs a more complex replacement in source files and
an editor can be a weak tool. That has happened to me in the recent
time and so I made the replacement in Python.

The script provides simple API:
- handle_file_p - return True if you want to touch the file
- modify_line - return modified line (if you want)

Are others also interested or should I put it to my internal tools?

Cheers,
Martin

contrib/ChangeLog:

* replace.py: New file.
---
 contrib/replace.py | 92 ++
 1 file changed, 92 insertions(+)
 create mode 100755 contrib/replace.py

diff --git a/contrib/replace.py b/contrib/replace.py
new file mode 100755
index 000..a510c6cd32c
--- /dev/null
+++ b/contrib/replace.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2021 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING.  If not, write to
+# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+#
+# The script can be used for more complex replacements in the source code.
+#
+
+import argparse
+import os
+import re
+import sys
+
+EXTENSIONS = ('.h', '.c', '.cc', '.C')
+
+
+def handle_file_p(filename):
+if 'testsuite' in filename:
+return False
+if all(not filename.endswith(ext) for ext in EXTENSIONS):
+return False
+
+return True
+
+
+def modify_line(line):
+# Example replacement:
+# m = re.match(r'.*time_function\(&([^,]*),', line)
+# if m:
+#e = m.end(1)
+#name = m.group(1)
+#line = line[:e + 1] + f' "{name}",' + line[e + 1:]
+
+return line
+
+
+parser = argparse.ArgumentParser(description='Make a custom replacements '
+ 'for source files')
+parser.add_argument('directory', help='Root directory')
+parser.add_argument('-v', '--verbose', action='store_true',
+help='Verbose output')
+args = parser.parse_args()
+
+visited_files = 0
+modified_files = 0
+
+for root, _, files in os.walk(sys.argv[1]):
+for file in files:
+full = os.path.join(root, file)
+if not handle_file_p(full):
+continue
+
+visited_files += 1
+
+modified = False
+try:
+modified_lines = []
+with open(full) as f:
+lines = f.readlines()
+for line in lines:
+modified_line = modify_line(line)
+if line != modified_line:
+modified = True
+modified_lines.append(line)
+if modified:
+with open(full, 'w') as w:
+w.write('\n'.join(modified_lines))
+modified_files += 1
+if args.verbose:
+print(f'File modified: {full}')
+except UnicodeDecodeError as e:
+print(f'Skipping file: {full} ({e})')
+
+
+print(f'Visited files: {visited_files}')
+print(f'Modified files: {modified_files}')
--
2.33.1



Re: [committed] libstdc++: Implement monadic operations for std::optional (P0798R8)

2021-10-20 Thread Jonathan Wakely via Gcc-patches
On Wed, 20 Oct 2021 at 06:48, Tim Song wrote:
>
> On Tue, Oct 19, 2021 at 9:05 AM Jonathan Wakely via Gcc-patches
>  wrote:
> >
> > +constexpr bool
> > +test_copy_elision()
> > +{
> > +  return true;
> > +}
> > +
> > +static_assert( test_copy_elision() );
> > +
>
> This isn't much of a test :)

The ultimate copy elision. The copies even get removed from the source code.

Thanks for spotting it. I meant to check we're constructing directly
into the union member without materializing the temporary early. I'll
add that today.



Fix PR middle-end/102764

2021-10-20 Thread Eric Botcazou via Gcc-patches
This is a regression present on the mainline in the form of a -fcompare-debug 
failure at -O3 on a compiler-generated testcase.  Fixed by disregarding a 
debug statement in the last position of a basic block to reset the current 
location for the outgoing edges.

Bootstrapped/regtested on x86-64/Linux, applied on the mainline as obvious.


2021-10-20  Eric Botcazou  

PR middle-end/102764
* cfgexpand.c (expand_gimple_basic_block): Disregard a final debug
statement to reset the current location for the outgoing edges.


2021-10-20  Eric Botcazou  

* gcc.dg/pr102764.c: New test.

-- 
Eric Botcazoudiff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 03260b019e5..8b067f9d848 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -6090,7 +6090,7 @@ expand_gimple_basic_block (basic_block bb, bool disable_tail_calls)
   /* Expand implicit goto and convert goto_locus.  */
   FOR_EACH_EDGE (e, ei, bb->succs)
 {
-  if (e->goto_locus != UNKNOWN_LOCATION || !stmt)
+  if (e->goto_locus != UNKNOWN_LOCATION || !stmt || is_gimple_debug (stmt))
 	set_curr_insn_location (e->goto_locus);
   if ((e->flags & EDGE_FALLTHRU) && e->dest != bb->next_bb)
 	{
/* PR middle-end/102764 */
/* Reported by Chengnian Sun  */

/* { dg-do compile } */
/* { dg-options "-O3 -fcompare-debug" } */

volatile int a;

void main (void)
{
  for (int i = 0; i < 1000; i++)
if (i % 17)
  a++;
}


[Ada] Avoid exception propagation during bootstrap

2021-10-20 Thread Eric Botcazou via Gcc-patches
This addresses PR ada/100486, which is the bootstrap failure of GCC 11 for 32-
bit Windows in the MSYS setup.  The PR shows that we cannot rely on exception 
propagation being operational during the bootstrap, at least on the 11 branch, 
so fix this by removing the problematic raise statement.

Tested on x86-64/Linux, applied on the mainline and 11 branch.


2021-10-20  Arnaud Charlet  

PR ada/100486
* sem_prag.adb (Check_Valid_Library_Unit_Pragma): Do not raise an
exception as part of the bootstrap.

-- 
Eric Botcazoucommit 035e7f25f8e01296da7dce327ef28ba15e1ccd8f
Author: Arnaud Charlet 
Date:   Mon Oct 18 03:35:44 2021 -0400

PR ada/100486 Avoid exception propagation during bootstrap

On some environments, we cannot rely on exception propagation being
operational during the bootstrap, as show by PR ada/100486 on windows
32bits mingw.

Fix this by removing the problematic raise statement.

[changelog]
* sem_prag.adb (Check_Valid_Library_Unit_Pragma): Do not raise an
exception as part of the bootstrap.

Change-Id: I52c541c93fe4f95a67cfc296b821d500bb90341b
TN: UA18-007

diff --git a/sem_prag.adb b/sem_prag.adb
index b3fa32ad5c..1e6397f87b 100644
--- a/sem_prag.adb
+++ b/sem_prag.adb
@@ -4144,8 +4144,10 @@ procedure Check_Valid_Library_Unit_Pragma;
   --  than library level instantiations these can appear in contexts which
   --  would normally be invalid (they only apply to the original template
   --  and to library level instantiations), and they are simply ignored,
-  --  which is implemented by rewriting them as null statements and raising
-  --  exception to terminate analysis.
+  --  which is implemented by rewriting them as null statements and
+  --  optionally raising Pragma_Exit to terminate analysis. An exception
+  --  is not always raised to avoid exception propagation during the
+  --  bootstrap, so all callers should check whether N has been rewritten.
 
   procedure Check_Variant (Variant : Node_Id; UU_Typ : Entity_Id);
   --  Check an Unchecked_Union variant for lack of nested variants and
@@ -6652,8 +6654,14 @@ procedure Check_Valid_Library_Unit_Pragma is
Sindex := Source_Index (Current_Sem_Unit);
 
if Loc not in Source_First (Sindex) .. Source_Last (Sindex) then
+  --  We do not want to raise an exception here since this code
+  --  is part of the bootstrap path where we cannot rely on
+  --  exception proapgation working.
+  --  Instead the caller should check for N being rewritten as
+  --  a null statement.
+  --  This code triggers when compiling a-except.adb.
+
   Rewrite (N, Make_Null_Statement (Loc));
-  raise Pragma_Exit;
 
--  If before first declaration, the pragma applies to the
--  enclosing unit, and the name if present must be this name.
@@ -12719,6 +12727,13 @@ procedure Malformed_State_Error (State : Node_Id) is
 Check_Ada_83_Warning;
 Check_Valid_Library_Unit_Pragma;
 
+--  If N was rewritten as a null statement there is nothing more
+--  to do.
+
+if Nkind (N) = N_Null_Statement then
+   return;
+end if;
+
 Lib_Entity := Find_Lib_Unit_Name;
 
 --  A pragma that applies to a Ghost entity becomes Ghost for the
@@ -15967,6 +15982,13 @@ function Is_Acceptable_Dim3 (N : Node_Id) return Boolean is
 Check_Ada_83_Warning;
 Check_Valid_Library_Unit_Pragma;
 
+--  If N was rewritten as a null statement there is nothing more
+--  to do.
+
+if Nkind (N) = N_Null_Statement then
+   return;
+end if;
+
 Cunit_Node := Cunit (Current_Sem_Unit);
 Cunit_Ent  := Cunit_Entity (Current_Sem_Unit);
 
@@ -19650,6 +19672,13 @@ procedure Skip_Spaces is
 GNAT_Pragma;
 Check_Valid_Library_Unit_Pragma;
 
+--  If N was rewritten as a null statement there is nothing more
+--  to do.
+
+if Nkind (N) = N_Null_Statement then
+   return;
+end if;
+
 --  Must appear for a spec or generic spec
 
 if Nkind (Unit (Cunit (Current_Sem_Unit))) not in
@@ -21436,6 +21465,13 @@ procedure Propagate_Part_Of (Pack_Id : Entity_Id) is
 Check_Ada_83_Warning;
 Check_Valid_Library_Unit_Pragma;
 
+--  If N was rewritten as a null statement there is nothing more
+--  to do.
+
+if Nkind (N) = N_Null_Statement then
+   return;
+end if;
+
 Ent := Find_Lib_Unit_Name;
 
 --  A pragma that applies to a Ghost entity becomes Ghost for the
@@ -22072,8 +22108,15 @@ procedure Check_Arg (

Re: [PATCH] hardened conditionals

2021-10-20 Thread Alexandre Oliva via Gcc-patches
On Oct 18, 2021, Richard Biener  wrote:

> Yes, it is (just having had a quick look most of the functions in the
> pass lack function-level comments).

Oh my, I'm so sorry, please accept my apologies.  I stepped away from
this patch for a few weeks, and when I got back to it, I did not realize
it wasn't quite finished yet.  I misled myself because I had already
written the ChangeLog entry, which I normally do as the last thing
before contributing a patch.  Besides the comments, it was missing
preservation of source location information.  I've implemented the
missing bit, and added comments to all functions and then some.

This patch regstrapped successfully on x86_64-linux-gnu.  Unfortunately,
both this patch and the earlier patch, applied onto recent trunk along
with a patch that enables both passes (both now in aoliva/hardcomp), hit
a bootstrap compare error in insn-opinit.c, confirmed with
-fcompare-debug.

I suppose it's a latent issue exposed by the patch, rather than some
problem introduced by the patch, because the earlier patch had
bootstrapped successfully with both passes enabled back then.

I'm yet to investigate this problem, but I'm a little tied up with
something else ATM, and it's likely an unrelated latent problem to be
fixed in a separate patch, so I'm posting this right away, and even
daring ask: ok to install?


This patch introduces optional passes to harden conditionals used in
branches, and in computing boolean expressions, by adding redundant
tests of the reversed conditions, and trapping in case of unexpected
results.  Though in abstract machines the redundant tests should never
fail, CPUs may be led to misbehave under certain kinds of attacks,
such as of power deprivation, and these tests reduce the likelihood of
going too far down an unexpected execution path.


for  gcc/ChangeLog

* common.opt (fharden-compares): New.
(fharden-conditional-branches): New.
* doc/invoke.texi: Document new options.
* gimple-harden-conditionals.cc: New.
* passes.def: Add new passes.
* tree-pass.h (make_pass_harden_compares): Declare.
(make_pass_harden_conditional_branches): Declare.

for  gcc/ada/ChangeLog

* doc/gnat_rm/security_hardening_features.rst
(Hardened Conditionals): New.

for  gcc/testsuite/ChangeLog

* c-c++-common/torture/harden-comp.c: New.
* c-c++-common/torture/harden-cond.c: New.
---
 gcc/Makefile.in|1 
 .../doc/gnat_rm/security_hardening_features.rst|   40 ++
 gcc/common.opt |8 
 gcc/doc/invoke.texi|   19 +
 gcc/gimple-harden-conditionals.cc  |  435 
 gcc/passes.def |2 
 gcc/testsuite/c-c++-common/torture/harden-comp.c   |   14 +
 gcc/testsuite/c-c++-common/torture/harden-cond.c   |   18 +
 gcc/tree-pass.h|3 
 9 files changed, 540 insertions(+)
 create mode 100644 gcc/gimple-harden-conditionals.cc
 create mode 100644 gcc/testsuite/c-c++-common/torture/harden-comp.c
 create mode 100644 gcc/testsuite/c-c++-common/torture/harden-cond.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index f36ffa4740b78..a79ff93dd5999 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1389,6 +1389,7 @@ OBJS = \
gimple-if-to-switch.o \
gimple-iterator.o \
gimple-fold.o \
+   gimple-harden-conditionals.o \
gimple-laddress.o \
gimple-loop-interchange.o \
gimple-loop-jam.o \
diff --git a/gcc/ada/doc/gnat_rm/security_hardening_features.rst 
b/gcc/ada/doc/gnat_rm/security_hardening_features.rst
index 1c46e3a4c7b88..52240d7e3dd54 100644
--- a/gcc/ada/doc/gnat_rm/security_hardening_features.rst
+++ b/gcc/ada/doc/gnat_rm/security_hardening_features.rst
@@ -87,3 +87,43 @@ types and subtypes, may be silently ignored.  Specifically, 
it is not
 currently recommended to rely on any effects this pragma might be
 expected to have when calling subprograms through access-to-subprogram
 variables.
+
+
+.. Hardened Conditionals:
+
+Hardened Conditionals
+=
+
+GNAT can harden conditionals to protect against control flow attacks.
+
+This is accomplished by two complementary transformations, each
+activated by a separate command-line option.
+
+The option *-fharden-compares* enables hardening of compares that
+compute results stored in variables, adding verification that the
+reversed compare yields the opposite result.
+
+The option *-fharden-conditional-branches* enables hardening of
+compares that guard conditional branches, adding verification of the
+reversed compare to both execution paths.
+
+These transformations are introduced late in the compilation pipeline,
+long after boolean expressions are decomposed into separate compares,
+each one turned into either a conditional branch or a compare whose
+result is stored in a boolean 

Re: [PATCH] options: Fix variable tracking option processing.

2021-10-20 Thread Martin Liška

On 10/19/21 12:53, Richard Biener wrote:

Meh ... :/

Well, move the target override hook call down (try to shuffle things
so diagnostics happen after but
"inits" happen before).


Not so easy. There are direct usages of the hooks
(influences dwarf2out_as_loc_support and dwarf2out_as_locview_support)

  if (!OPTION_SET_P (dwarf2out_as_loc_support))
dwarf2out_as_loc_support = dwarf2out_default_as_loc_support ();
  if (!OPTION_SET_P (dwarf2out_as_locview_support))
dwarf2out_as_locview_support = dwarf2out_default_as_locview_support ();

  if (!OPTION_SET_P (debug_variable_location_views))
{
  debug_variable_location_views
= (flag_var_tracking
   && debug_info_level >= DINFO_LEVEL_NORMAL
   && dwarf_debuginfo_p ()
   && !dwarf_strict
   && dwarf2out_as_loc_support
   && dwarf2out_as_locview_support);
}

and then the warnings depend on debug_variable_location_views.

I have another attempt which is about moving option detection of 
debug_nonbind_markers_p
to finish_options. That works fine, except one needs to mark the option as 
PerFunction.
That's because it depends on 'optimize' and that would trigger:
'global_options are modified in local context' verification error.

What do you think about the patch?
Cheers,
MartinFrom 1fbeeb6bb326c6e9e704be7a99b69014b1104fda Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Thu, 14 Oct 2021 14:57:18 +0200
Subject: [PATCH] options: Fix variable tracking option processing.

	PR debug/102585
	PR bootstrap/102766

gcc/ChangeLog:

	* opts.c (finish_options): Process flag_var_tracking* options
	here as they can be adjusted by optimize attribute.
	* toplev.c (process_options): Remove it here.
	* common.opt: Make debug_nonbind_markers_p as PerFunction
	attribute as it depends on optimization level.

gcc/testsuite/ChangeLog:

	* gcc.dg/pr102585.c: New test.
---
 gcc/common.opt  |  2 +-
 gcc/opts.c  | 26 ++
 gcc/testsuite/gcc.dg/pr102585.c |  6 ++
 gcc/toplev.c| 28 +---
 4 files changed, 34 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr102585.c

diff --git a/gcc/common.opt b/gcc/common.opt
index a2af7fb36e0..c4a77f65aa2 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3284,7 +3284,7 @@ Common Driver JoinedOrMissing Negative(gvms)
 Generate debug information in extended STABS format.
 
 gstatement-frontiers
-Common Driver Var(debug_nonbind_markers_p)
+Common Driver Var(debug_nonbind_markers_p) PerFunction
 Emit progressive recommended breakpoint locations.
 
 gstrict-dwarf
diff --git a/gcc/opts.c b/gcc/opts.c
index 65fe192a198..2dd1e5d5372 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1349,6 +1349,32 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
 SET_OPTION_IF_UNSET (opts, opts_set, flag_vect_cost_model,
 			 VECT_COST_MODEL_CHEAP);
 
+  /* One could use EnabledBy, but it would lead to a circular dependency.  */
+  if (!OPTION_SET_P (flag_var_tracking_uninit))
+ flag_var_tracking_uninit = flag_var_tracking;
+
+  if (!OPTION_SET_P (flag_var_tracking_assignments))
+flag_var_tracking_assignments
+  = (flag_var_tracking
+	 && !(flag_selective_scheduling || flag_selective_scheduling2));
+
+  if (flag_var_tracking_assignments_toggle)
+flag_var_tracking_assignments = !flag_var_tracking_assignments;
+
+  if (flag_var_tracking_assignments && !flag_var_tracking)
+flag_var_tracking = flag_var_tracking_assignments = -1;
+
+  if (flag_var_tracking_assignments
+  && (flag_selective_scheduling || flag_selective_scheduling2))
+warning_at (loc, 0,
+		"var-tracking-assignments changes selective scheduling");
+
+  if (!OPTION_SET_P (debug_nonbind_markers_p))
+debug_nonbind_markers_p
+  = (optimize
+	 && debug_info_level >= DINFO_LEVEL_NORMAL
+	 && dwarf_debuginfo_p ()
+	 && !(flag_selective_scheduling || flag_selective_scheduling2));
 }
 
 #define LEFT_COLUMN	27
diff --git a/gcc/testsuite/gcc.dg/pr102585.c b/gcc/testsuite/gcc.dg/pr102585.c
new file mode 100644
index 000..efd066b4a4e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr102585.c
@@ -0,0 +1,6 @@
+/* PR debug/102585 */
+/* { dg-do compile } */
+/* { dg-options "-fvar-tracking-assignments -fno-var-tracking" } */
+
+#pragma GCC optimize 0
+void d_demangle_callback_Og() { int c = 0; }
diff --git a/gcc/toplev.c b/gcc/toplev.c
index cb4f8c470f0..67f921afb3e 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1499,6 +1499,7 @@ process_options (bool no_backend)
 	}
   flag_var_tracking = 0;
   flag_var_tracking_uninit = 0;
+  flag_var_tracking_assignments = 0;
 }
 
   /* The debug hooks are used to implement -fdump-go-spec because it
@@ -1507,33 +1508,6 @@ process_options (bool no_backend)
   if (flag_dump_go_spec != NULL)
 debug_hooks = dump_go_spec_init (flag_dump_go_spec, debug_hooks);
 
-  /* One could use EnabledBy, but it would lead to a circular depe

[PATCH, rs6000] Disable gimple fold for float or double vec_minmax when fast-math is not set

2021-10-20 Thread HAO CHEN GUI via Gcc-patches

Hi,

This patch disables gimple folding for float or double vec_min/max when 
fast-math is not set. It makes vec_min/max conform with the guide.

Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay 
for trunk? Any recommendations? Thanks a lot.

  I refined the patch according to reviewers' advice. The attachments are the 
ChangeLog and patch diff in case the email body is messed up.


ChangeLog

2021-10-20 Haochen Gui 

gcc/
    * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin):
    Disable gimple fold for VSX_BUILTIN_XVMINDP, ALTIVEC_BUILTIN_VMINFP,
    VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP when fast-math is not
    set.

gcc/testsuite/
    * gcc.target/powerpc/vec-minmax-1.c: New test.
    * gcc.target/powerpc/vec-minmax-2.c: Likewise.


patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..51c7ba447c3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -12159,6 +12159,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   return true;
 /* flavors of vec_min.  */
 case VSX_BUILTIN_XVMINDP:
+    case ALTIVEC_BUILTIN_VMINFP:
+  {
+   lhs = gimple_call_lhs (stmt);
+   tree type = TREE_TYPE (lhs);
+   if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+ return false;
+   gcc_fallthrough ();
+  }
 case P8V_BUILTIN_VMINSD:
 case P8V_BUILTIN_VMINUD:
 case ALTIVEC_BUILTIN_VMINSB:
@@ -12167,7 +12175,6 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case ALTIVEC_BUILTIN_VMINUB:
 case ALTIVEC_BUILTIN_VMINUH:
 case ALTIVEC_BUILTIN_VMINUW:
-    case ALTIVEC_BUILTIN_VMINFP:
   arg0 = gimple_call_arg (stmt, 0);
   arg1 = gimple_call_arg (stmt, 1);
   lhs = gimple_call_lhs (stmt);
@@ -12177,6 +12184,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   return true;
 /* flavors of vec_max.  */
 case VSX_BUILTIN_XVMAXDP:
+    case ALTIVEC_BUILTIN_VMAXFP:
+  {
+   lhs = gimple_call_lhs (stmt);
+   tree type = TREE_TYPE (lhs);
+   if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+ return false;
+   gcc_fallthrough ();
+  }
 case P8V_BUILTIN_VMAXSD:
 case P8V_BUILTIN_VMAXUD:
 case ALTIVEC_BUILTIN_VMAXSB:
@@ -12185,7 +12200,6 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case ALTIVEC_BUILTIN_VMAXUB:
 case ALTIVEC_BUILTIN_VMAXUH:
 case ALTIVEC_BUILTIN_VMAXUW:
-    case ALTIVEC_BUILTIN_VMAXFP:
   arg0 = gimple_call_arg (stmt, 0);
   arg1 = gimple_call_arg (stmt, 1);
   lhs = gimple_call_lhs (stmt);
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c 
b/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c
new file mode 100644
index 000..e238659c9be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-final { scan-assembler-times {\mxvmaxdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmaxsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmindp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvminsp\M} 1 } } */
+
+/* This test verifies that float or double vec_min/max are bound to
+   xv[min|max][d|s]p instructions when fast-math is not set.  */
+
+
+#include 
+
+#ifdef _BIG_ENDIAN
+   const int PREF_D = 0;
+#else
+   const int PREF_D = 1;
+#endif
+
+double vmaxd (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_max (va, vb), PREF_D);
+}
+
+double vmind (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_min (va, vb), PREF_D);
+}
+
+#ifdef _BIG_ENDIAN
+   const int PREF_F = 0;
+#else
+   const int PREF_F = 3;
+#endif
+
+float vmaxf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_max (va, vb), PREF_F);
+}
+
+float vminf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_min (va, vb), PREF_F);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c 
b/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c
new file mode 100644
index 000..149275d8709
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c
@@ -0,0 +1,50 @@
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -ffast-math" } */
+/* { dg-final { scan-assembler-times {\mxsmaxcdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxsmincdp\M} 2 } } */
+
+/* This test verifies that float or double vec_min/max can be converted
+   to scalar comparison when fast-math is set.  */
+
+
+#include 
+
+#ifdef _B

Re: [RFC] Partial vectors for s390

2021-10-20 Thread Richard Sandiford via Gcc-patches
Robin Dapp via Gcc-patches  writes:
> Hi,
>
> I have been playing around with making Kewen's partial vector changes 
> workable with s390:
>
> We have a vll instruction that can be passed the highest byte to load. 
> The rather unfortunate consequence of this is that a length of zero 
> cannot be specified.  The partial vector framework, however, relies a 
> lot on the fact that a len_load can be made a NOP using a length of zero.
>
> After confirming an additional zero-check before each vll is definitely 
> too slow across SPEC and some discussion with Kewen we figured the 
> easiest way forward is to exclude loops with multiple VFs (despite 
> giving up vectorization possibilities).  These are prone to len_loads 
> with zero while the regular induction variable check prevents them in 
> single-VF loops.
>
> So, as a quick hack, I went with
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 75f24e7c4f6..f79222daeb6 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -1170,6 +1170,9 @@ vect_verify_loop_lens (loop_vec_info loop_vinfo)
> if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
>   return false;
>
> +  if (LOOP_VINFO_LENS (loop_vinfo).length () > 1)
> +return false;
> +

Yeah, I think this should be sufficient.

> which could be made a hook, eventually.  FWIW this is sufficient to make 
> bootstrap, regtest and compiling the SPEC suites succeed.  I'm unsure 
> whether we are guaranteed not to emit len_load with zero now.   On top, 
> I subtract 1 from the passed length in the expander, which, supposedly, 
> is also not ideal.

Exposing the subtraction in gimple would certainly allow for
more optimisation.

We already have code to probe the predicates of the underlying
define_expands/insns to see whether they support certain constant
IFN arguments; see e.g. internal_gather_scatter_fn_supported_p.
We could do something similar here: add an extra operand to the optab,
and an extra argument to the IFN, that gives a bias amount.
The PowerPC version would require 0, the System Z version would
require -1.  The vectoriser would probe to see which value
it should use.

Doing it that way ensures that the gimple is still self-describing.
It avoids gimple semantics depending on target hooks.

> There are some regressions that I haven't fully analyzed yet but whether 
> and when to actually enable this feature could be a backend decision 
> with the necessary middle-end checks already in place.
>
> Any ideas on how to properly check for the zero condition and exclude 
> the cases that cause it? Kewen suggested enriching the len_load optabs 
> with a separate parameter.

Yeah, I think that'd be a good approach.  A bias of -1 would indicate
that the target can't cope with zero lengths.

Thanks,
Richard


Re: [PATCH] libstdc++: Add support for POWER9 DARN instruction to std::random_device

2021-10-20 Thread Jonathan Wakely via Gcc-patches

On 19/10/21 17:47 +0100, Jonathan Wakely wrote:

The ISA-3.0 instruction set includes DARN ("deliver a random number")
which can be used similar to the existing support for RDRAND and RDSEED.

libstdc++-v3/ChangeLog:

* src/c++11/random.cc (USE_DARN): Define.
(__ppc_darn): New function to use POWER9 DARN instruction.
(Which): Add 'darn' enumerator.
(which_source): Check for __ppc_darn.
(random_device::_M_init): Support "darn" and "hw" tokens.
(random_device::_M_getentropy): Add darn to switch.
* testsuite/26_numerics/random/random_device/cons/token.cc:
Check "darn" token.
* testsuite/26_numerics/random/random_device/entropy.cc:
Likewise.

Tested powerpc64le-linux (power8 and power9) and x86_64-linux.

The new "darn" (power-specific) and "hw" (x86 and power)
strings should be documented, but I'll do that if this gets committed.

Most of this patch is just "more of the same", similar to the existing
code for RDRAND and RDSEED on x86, but the parts of the patch I'd like
more eyes on are:


+#elif defined __powerpc__ && defined __BUILTIN_CPU_SUPPORTS__
+# define USE_DARN 1
#endif


This means DARN can only be used when __builtin_cpu_supports is
available, which means glibc 2.23 ... is that acceptable? It means
RHEL 7 wouldn't be able to use DARN, but RHEL 8 would.

There certainly are POWER9 machines running RHEL 7 and similar
vintages (the GCC compile farm has one) so if there's another way to
check for ISA 3.0 then I could use that.

If __POWER9_VECTOR__ is defined when building libstdc++, presumably
that means the whole library can only be run on POWER9 hardware. So
would that mean we don't need to check __builtin_cpu_supports("darn")
when __POWER9_VECTOR__ is defined? Or is it possible to build with
-mcpu=power8 -mpower9-vector and run it on h/w without the DARN
instruction?

Also, I forgot to add a configure check that the assembler supports
darn, which is another prerequisite for using it here.


@@ -135,6 +137,15 @@ namespace std _GLIBCXX_VISIBILITY(default)
#endif
#endif

+#ifdef USE_DARN
+unsigned int
+__attribute__((target("power9")))


Oops, that should be "cpu=power9".

With that change it works on a POWER9 machine (9009-42A) with glibc
2.34 and binutils 2.35.



+__ppc_darn(void*)
+{
+  return __builtin_darn_32();
+}
+#endif




Re: [RFC] Remove VRP threader passes in exchange for better threading pre-VRP.

2021-10-20 Thread Aldy Hernandez via Gcc-patches
On Wed, Oct 20, 2021 at 1:00 AM Jeff Law  wrote:
>
>
>
> On 10/18/2021 8:03 AM, Aldy Hernandez wrote:
> >
> >
> > On 10/18/21 3:41 PM, Aldy Hernandez wrote:
> >
> >> I've been experimenting with reducing the total number of threading
> >> passes, and I'd like to see if there's consensus/stomach for altering
> >> the pipeline.  Note, that the goal is to remove forward threader
> >> clients,
> >> not the other way around.  So, we should prefer to remove a VRP threader
> >> instance over a *.thread one immediately before VRP.
> >>
> >> After some playing, it looks like if we enable fully-resolving mode in
> >> the *.thread passes immediately preceeding VRP, we can remove the VRP
> >> threading passes altogether, thus removing 2 threading passes (and
> >> forward threading passes at that!).
> >
> > It occurs to me that we could also remove the threading before VRP
> > passes, and enable a fully-resolving backward threader after VRP. I
> > haven't played with this scenario, but it should be just as good.
> > That being said, I don't know the intricacies of why we had both pre
> > and post VRP threading passes, and if one is ideally better than the
> > other.
> The only post-VRP threading pass that (in my mind) makes sense is the
> one sitting between VRP and DOM and it should replace the DOM based
> threader.

Yes, that's the goal, but it won't happen on this release because of
floats.  The DOM threader uses the const/avails machinery to thread
conditionals involving floats, something the path solver can't do
because it depends on gori/ranger.  Adding floats to ranger is
probably our #1 task for the next cycle.

Now before Andrew gets clever, the relation oracle is technically type
agnostic, so it could theoretically be possible to use it in the DOM
threader and replace all the const/avails stuff.  But I'd like to go
on vacation at some point ;-).

Aldy



PING^5 [PATCH v2] combine: Tweak the condition of last_set invalidation

2021-10-20 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-June/572555.html

BR,
Kewen

> 
 on 2021/6/11 下午9:16, Kewen.Lin via Gcc-patches wrote:
> Hi Segher,
>
> Thanks for the review!
>
> on 2021/6/10 上午4:17, Segher Boessenkool wrote:
>> Hi!
>>
>> On Wed, Dec 16, 2020 at 04:49:49PM +0800, Kewen.Lin wrote:
>>> Currently we have the check:
>>>
>>>   if (!insn
>>>   || (value && rsp->last_set_table_tick >= 
>>> label_tick_ebb_start))
>>> rsp->last_set_invalid = 1; 
>>>
>>> which means if we want to record some value for some reg and
>>> this reg got refered before in a valid scope,
>>
>> If we already know it is *set* in this same extended basic block.
>> Possibly by the same instruction btw.
>>
>>> we invalidate the
>>> set of reg (last_set_invalid to 1).  It avoids to find the wrong
>>> set for one reg reference, such as the case like:
>>>
>>>... op regX  // this regX could find wrong last_set below
>>>regX = ...   // if we think this set is valid
>>>... op regX
>>
>> Yup, exactly.
>>
>>> But because of retry's existence, the last_set_table_tick could
>>> be set by some later reference insns, but we see it's set due
>>> to retry on the set (for that reg) insn again, such as:
>>>
>>>insn 1
>>>insn 2
>>>
>>>regX = ... --> (a)
>>>... op regX--> (b)
>>>
>>>insn 3
>>>
>>>// assume all in the same BB.
>>>
>>> Assuming we combine 1, 2 -> 3 sucessfully and replace them as two
>>> (3 insns -> 2 insns),
>>
>> This will delete insn 1 and write the combined result to insns 2 and 3.
>>
>>> retrying from insn1 or insn2 again:
>>
>> Always 2, but your point remains valid.
>>
>>> it will scan insn (a) again, the below condition holds for regX:
>>>
>>>   (value && rsp->last_set_table_tick >= label_tick_ebb_start)
>>>
>>> it will mark this set as invalid set.  But actually the
>>> last_set_table_tick here is set by insn (b) before retrying, so it
>>> should be safe to be taken as valid set.
>>
>> Yup.
>>
>>> This proposal is to check whether the last_set_table safely happens
>>> after the current set, make the set still valid if so.
>>
>>> Full SPEC2017 building shows this patch gets more sucessful combines
>>> from 1902208 to 1902243 (trivial though).
>>
>> Do you have some example, or maybe even a testcase?  :-)
>>
>
> Sorry for the late reply, it took some time to get one reduced case.
>
> typedef struct SA *pa_t;
>
> struct SC {
>   int h;
>   pa_t elem[];
> };
>
> struct SD {
>   struct SC *e;
> };
>
> struct SA {
>   struct {
> struct SD f[1];
>   } g;
> };
>
> void foo(pa_t *k, char **m) {
>   int l, i;
>   pa_t a;
>   l = (int)a->g.f[5].e;
>   i = 0;
>   for (; i < l; i++) {
> k[i] = a->g.f[5].e->elem[i];
> m[i] = "";
>   }
> }
>
> Baseline is r12-0 and the option is "-O3 -mcpu=power9 
> -fno-strict-aliasing",
> with this patch, the generated assembly can save two rlwinm s.
>
>>> +  /* Record the luid of the insn whose expression involving register 
>>> n.  */
>>> +
>>> +  int  last_set_table_luid;
>>
>> "Record the luid of the insn for which last_set_table_tick was set",
>> right?
>>
>
> But it can be updated later to one smaller luid, how about the wording 
> like:
>
>
> +  /* Record the luid of the insn which uses register n, the insn should
> + be the first one using register n in that block of the insn which
> + last_set_table_tick was set for.  */
>
>
>>> -static void update_table_tick (rtx);
>>> +static void update_table_tick (rtx, int);
>>
>> Please remove this declaration instead, the function is not used until
>> after its actual definition :-)
>>
>
> Done.
>
>>> @@ -13243,7 +13247,21 @@ update_table_tick (rtx x)
>>>for (r = regno; r < endregno; r++)
>>> {
>>>   reg_stat_type *rsp = ®_stat[r];
>>> - rsp->last_set_table_tick = label_tick;
>>> + if (rsp->last_set_table_tick >= label_tick_ebb_start)
>>> +   {
>>> + /* Later references should not have lower ticks.  */
>>> + gcc_assert (label_tick >= rsp->last_set_table_tick);
>>
>> This should be obvious, but checking it won't hurt, okay.
>>
>>> + /* Should pick up the lowest luid if the references
>>> +are in the same block.  */
>>> + if (label_tick == rsp->last_set_table_tick
>>> + && rsp->last_set_table_luid > insn_l

PING^2 [PATCH v2] rs6000: Modify the way for extra penalized cost

2021-10-20 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/580358.html

BR,
Kewen

> on 2021/9/28 下午4:16, Kewen.Lin via Gcc-patches wrote:
>> Hi,
>>
>> This patch follows the discussions here[1][2], where Segher
>> pointed out the existing way to guard the extra penalized
>> cost for strided/elementwise loads with a magic bound does
>> not scale.
>>
>> The way with nunits * stmt_cost can get one much
>> exaggerated penalized cost, such as: for V16QI on P8, it's
>> 16 * 20 = 320, that's why we need one bound.  To make it
>> better and more readable, the penalized cost is simplified
>> as:
>>
>> unsigned adjusted_cost = (nunits == 2) ? 2 : 1;
>> unsigned extra_cost = nunits * adjusted_cost;
>>
>> For V2DI/V2DF, it uses 2 penalized cost for each scalar load
>> while for the other modes, it uses 1.  It's mainly concluded
>> from the performance evaluations.  One thing might be
>> related is that: More units vector gets constructed, more
>> instructions are used.  It has more chances to schedule them
>> better (even run in parallelly when enough available units
>> at that time), so it seems reasonable not to penalize more
>> for them.
>>
>> The SPEC2017 evaluations on Power8/Power9/Power10 at option
>> sets O2-vect and Ofast-unroll show this change is neutral.
>>
>> Bootstrapped and regress-tested on powerpc64le-linux-gnu Power9.
>>
>> Is it ok for trunk?
>>
>> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579121.html
>> [2] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/580099.html
>> v1: https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579529.html
>>
>> BR,
>> Kewen
>> -
>> gcc/ChangeLog:
>>
>>  * config/rs6000/rs6000.c (rs6000_update_target_cost_per_stmt): Adjust
>>  the way to compute extra penalized cost.  Remove useless parameter.
>>  (rs6000_add_stmt_cost): Adjust the call to function
>>  rs6000_update_target_cost_per_stmt.
>>
>>
>> ---
>>  gcc/config/rs6000/rs6000.c | 31 ++-
>>  1 file changed, 18 insertions(+), 13 deletions(-)
>>
>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>> index dd42b0964f1..8200e1152c2 100644
>> --- a/gcc/config/rs6000/rs6000.c
>> +++ b/gcc/config/rs6000/rs6000.c
>> @@ -5422,7 +5422,6 @@ rs6000_update_target_cost_per_stmt (rs6000_cost_data 
>> *data,
>>  enum vect_cost_for_stmt kind,
>>  struct _stmt_vec_info *stmt_info,
>>  enum vect_cost_model_location where,
>> -int stmt_cost,
>>  unsigned int orig_count)
>>  {
>>
>> @@ -5462,17 +5461,23 @@ rs6000_update_target_cost_per_stmt (rs6000_cost_data 
>> *data,
>>  {
>>tree vectype = STMT_VINFO_VECTYPE (stmt_info);
>>unsigned int nunits = vect_nunits_for_cost (vectype);
>> -  unsigned int extra_cost = nunits * stmt_cost;
>> -  /* As function rs6000_builtin_vectorization_cost shows, we have
>> - priced much on V16QI/V8HI vector construction as their units,
>> - if we penalize them with nunits * stmt_cost, it can result in
>> - an unreliable body cost, eg: for V16QI on Power8, stmt_cost
>> - is 20 and nunits is 16, the extra cost is 320 which looks
>> - much exaggerated.  So let's use one maximum bound for the
>> - extra penalized cost for vector construction here.  */
>> -  const unsigned int MAX_PENALIZED_COST_FOR_CTOR = 12;
>> -  if (extra_cost > MAX_PENALIZED_COST_FOR_CTOR)
>> -extra_cost = MAX_PENALIZED_COST_FOR_CTOR;
>> +  /* Don't expect strided/elementwise loads for just 1 nunit.  */
>> +  gcc_assert (nunits > 1);
>> +  /* i386 port adopts nunits * stmt_cost as the penalized cost
>> + for this kind of penalization, we used to follow it but
>> + found it could result in an unreliable body cost especially
>> + for V16QI/V8HI modes.  To make it better, we choose this
>> + new heuristic: for each scalar load, we use 2 as penalized
>> + cost for the case with 2 nunits and use 1 for the other
>> + cases.  It's without much supporting theory, mainly
>> + concluded from the broad performance evaluations on Power8,
>> + Power9 and Power10.  One possibly related point is that:
>> + vector construction for more units would use more insns,
>> + it has more chances to schedule them better (even run in
>> + parallelly when enough available units at that time), so
>> + it seems reasonable not to penalize that much for them.  */
>> +  unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
>> +  unsigned int extra_cost = nunits * adjusted_cost;
>>data->extra_ctor_cost += extra_cost;
>>  }
>>  }
>> @@ -5510,7 +5515,7 @@ rs6000_add_stmt_cost (class vec_info *vinfo, void 
>> *data, int count,
>>cost_data->cost[where] += retval;
>>
>

PING^4 [PATCH] rs6000: Fix some issues in rs6000_can_inline_p [PR102059]

2021-10-20 Thread Kewen.Lin via Gcc-patches
Hi,

Gentle ping this patch:

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/578552.html

One related patch [1] is ready to commit, whose test cases rely on
this patch if no changes are applied to them.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579658.html

BR,
Kewen

>>> on 2021/9/1 下午2:55, Kewen.Lin via Gcc-patches wrote:
 Hi!

 This patch is to fix the inconsistent behaviors for non-LTO mode
 and LTO mode.  As Martin pointed out, currently the function
 rs6000_can_inline_p simply makes it inlinable if callee_tree is
 NULL, but it's wrong, we should use the command line options
 from target_option_default_node as default.  It also replaces
 rs6000_isa_flags with the one from target_option_default_node
 when caller_tree is NULL as rs6000_isa_flags could probably
 change since initialization.

 It also extends the scope of the check for the case that callee
 has explicit set options, for test case pr102059-2.c inlining can
 happen unexpectedly before, it's fixed accordingly.

 As Richi/Mike pointed out, some tuning flags like MASK_P8_FUSION
 can be neglected for inlining, this patch also exludes them when
 the callee is attributed by always_inline.

 Bootstrapped and regtested on powerpc64le-linux-gnu Power9.

 BR,
 Kewen
 -
 gcc/ChangeLog:

PR ipa/102059
* config/rs6000/rs6000.c (rs6000_can_inline_p): Adjust with
target_option_default_node and consider always_inline_safe flags.

 gcc/testsuite/ChangeLog:

PR ipa/102059
* gcc.target/powerpc/pr102059-1.c: New test.
* gcc.target/powerpc/pr102059-2.c: New test.
* gcc.target/powerpc/pr102059-3.c: New test.
* gcc.target/powerpc/pr102059-4.c: New test.

>>>


Re: [aarch64] PR102376 - Emit better diagnostic for arch extensions in target attr

2021-10-20 Thread Richard Sandiford via Gcc-patches
Prathamesh Kulkarni  writes:
> On Tue, 19 Oct 2021 at 19:58, Richard Sandiford
>  wrote:
>>
>> Prathamesh Kulkarni  writes:
>> > Hi,
>> > The attached patch emits a more verbose diagnostic for target attribute 
>> > that
>> > is an architecture extension needing a leading '+'.
>> >
>> > For the following test,
>> > void calculate(void) __attribute__ ((__target__ ("sve")));
>> >
>> > With patch, the compiler now emits:
>> > 102376.c:1:1: error: arch extension ‘sve’ should be prepended with ‘+’
>> > 1 | void calculate(void) __attribute__ ((__target__ ("sve")));
>> >   | ^~~~
>> >
>> > instead of:
>> > 102376.c:1:1: error: pragma or attribute ‘target("sve")’ is not valid
>> > 1 | void calculate(void) __attribute__ ((__target__ ("sve")));
>> >   | ^~~~
>>
>> Nice :-)
>>
>> > (This isn't specific to sve though).
>> > OK to commit after bootstrap+test ?
>> >
>> > Thanks,
>> > Prathamesh
>> >
>> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> > index a9a1800af53..975f7faf968 100644
>> > --- a/gcc/config/aarch64/aarch64.c
>> > +++ b/gcc/config/aarch64/aarch64.c
>> > @@ -17821,7 +17821,16 @@ aarch64_process_target_attr (tree args)
>> >num_attrs++;
>> >if (!aarch64_process_one_target_attr (token))
>> >   {
>> > -   error ("pragma or attribute % is not valid", 
>> > token);
>> > +   /* Check if token is possibly an arch extension without
>> > +  leading '+'.  */
>> > +   char *str = (char *) xmalloc (strlen (token) + 2);
>> > +   str[0] = '+';
>> > +   strcpy(str + 1, token);
>>
>> I think std::string would be better here, e.g.:
>>
>>   auto with_plus = std::string ("+") + token;
>>
>> > +   if (aarch64_handle_attr_isa_flags (str))
>> > + error("arch extension %<%s%> should be prepended with %<+%>", 
>> > token);
>>
>> Nit: should be a space before the “(”.
>>
>> In principle, a fixit hint would have been nice here, but I don't think
>> we have enough information to provide one.  (Just saying for the record.)
> Thanks for the suggestions.
> Does the attached patch look OK ?

Looks good apart from a couple of formatting nits.
>
> Thanks,
> Prathamesh
>>
>> Thanks,
>> Richard
>>
>> > +   else
>> > + error ("pragma or attribute % is not valid", 
>> > token);
>> > +   free (str);
>> > return false;
>> >   }
>> >
>
> [aarch64] PR102376 - Emit better diagnostics for arch extension in target 
> attribute.
>
> gcc/ChangeLog:
>   PR target/102376
>   * config/aarch64/aarch64.c (aarch64_handle_attr_isa_flags): Change str's
>   type to const char *.
>   (aarch64_process_target_attr): Check if token is possibly an arch 
> extension
>   without leading '+' and emit diagnostic accordingly.
>
> gcc/testsuite/ChangeLog:
>   PR target/102376
>   * gcc.target/aarch64/pr102376.c: New test.
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index a9a1800af53..b72079bc466 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -17548,7 +17548,7 @@ aarch64_handle_attr_tune (const char *str)
> modified.  */
>  
>  static bool
> -aarch64_handle_attr_isa_flags (char *str)
> +aarch64_handle_attr_isa_flags (const char *str)
>  {
>enum aarch64_parse_opt_result parse_res;
>uint64_t isa_flags = aarch64_isa_flags;
> @@ -17821,7 +17821,13 @@ aarch64_process_target_attr (tree args)
>num_attrs++;
>if (!aarch64_process_one_target_attr (token))
>   {
> -   error ("pragma or attribute % is not valid", token);
> +   /* Check if token is possibly an arch extension without
> +  leading '+'.  */
> +   auto with_plus = std::string("+") + token;

Should be a space before “(”.

> +   if (aarch64_handle_attr_isa_flags (with_plus.c_str ()))
> + error ("arch extension %<%s%> should be prepended with %<+%>", 
> token);

Long line, should be:

error ("arch extension %<%s%> should be prepended with %<+%>",
   token);

OK with those changes, thanks.

Richard


> +   else
> + error ("pragma or attribute % is not valid", 
> token);
> return false;
>   }
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr102376.c 
> b/gcc/testsuite/gcc.target/aarch64/pr102376.c
> new file mode 100644
> index 000..efd15f6ca9b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr102376.c
> @@ -0,0 +1,3 @@
> +/* { dg-do compile } */
> +
> +void calculate(void) __attribute__ ((__target__ ("sve"))); /* { dg-error 
> "arch extension 'sve' should be prepended with '\\+'" } */


PING^1 [PATCH] rs6000: Remove builtin mask check from builtin_decl [PR102347]

2021-10-20 Thread Kewen.Lin via Gcc-patches
Hi,

As the discussions and the testing result under the main thread, this
patch would be safe.

Ping for this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/580357.html

BR,
Kewen

on 2021/9/28 下午4:13, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> As the discussion in PR102347, currently builtin_decl is invoked so
> early, it's when making up the function_decl for builtin functions,
> at that time the rs6000_builtin_mask could be wrong for those
> builtins sitting in #pragma/attribute target functions, though it
> will be updated properly later when LTO processes all nodes.
> 
> This patch is to align with the practice i386 port adopts, also
> align with r10-7462 by relaxing builtin mask checking in some places.
> 
> Bootstrapped and regress-tested on powerpc64le-linux-gnu P9 and
> powerpc64-linux-gnu P8.
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> -
> gcc/ChangeLog:
> 
>   PR target/102347
>   * config/rs6000/rs6000-call.c (rs6000_builtin_decl): Remove builtin
>   mask check.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR target/102347
>   * gcc.target/powerpc/pr102347.c: New test.
> 
> ---
>  gcc/config/rs6000/rs6000-call.c | 14 --
>  gcc/testsuite/gcc.target/powerpc/pr102347.c | 15 +++
>  2 files changed, 19 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr102347.c
> 
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index fd7f24da818..15e0e09c07d 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -13775,23 +13775,17 @@ rs6000_init_builtins (void)
>  }
>  }
> 
> -/* Returns the rs6000 builtin decl for CODE.  */
> +/* Returns the rs6000 builtin decl for CODE.  Note that we don't check
> +   the builtin mask here since there could be some #pragma/attribute
> +   target functions and the rs6000_builtin_mask could be wrong when
> +   this checking happens, though it will be updated properly later.  */
> 
>  tree
>  rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
>  {
> -  HOST_WIDE_INT fnmask;
> -
>if (code >= RS6000_BUILTIN_COUNT)
>  return error_mark_node;
> 
> -  fnmask = rs6000_builtin_info[code].mask;
> -  if ((fnmask & rs6000_builtin_mask) != fnmask)
> -{
> -  rs6000_invalid_builtin ((enum rs6000_builtins)code);
> -  return error_mark_node;
> -}
> -
>return rs6000_builtin_decls[code];
>  }
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr102347.c 
> b/gcc/testsuite/gcc.target/powerpc/pr102347.c
> new file mode 100644
> index 000..05c439a8dac
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr102347.c
> @@ -0,0 +1,15 @@
> +/* { dg-do link } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-require-effective-target lto } */
> +/* { dg-options "-flto -mdejagnu-cpu=power9" } */
> +
> +/* Verify there are no error messages in LTO mode.  */
> +
> +#pragma GCC target "cpu=power10"
> +int main ()
> +{
> +  float *b;
> +  __vector_quad c;
> +  __builtin_mma_disassemble_acc (b, &c);
> +  return 0;
> +}
> --
> 2.27.0
> 



Re: [Patch][GCC][middle-end] - Lower store and load neon builtins to gimple

2021-10-20 Thread Andre Vieira (lists) via Gcc-patches

On 27/09/2021 12:54, Richard Biener via Gcc-patches wrote:

On Mon, 27 Sep 2021, Jirui Wu wrote:


Hi all,

I now use the type based on the specification of the intrinsic
instead of type based on formal argument.

I use signed Int vector types because the outputs of the neon builtins
that I am lowering is always signed. In addition, fcode and stmt
does not have information on whether the result is signed.

Because I am replacing the stmt with new_stmt,
a VIEW_CONVERT_EXPR cast is already in the code if needed.
As a result, the result assembly code is correct.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master? If OK can it be committed for me, I have no commit rights.

+   tree temp_lhs = gimple_call_lhs (stmt);
+   aarch64_simd_type_info simd_type
+ = aarch64_simd_types[mem_type];
+   tree elt_ptr_type = build_pointer_type (simd_type.eltype);
+   tree zero = build_zero_cst (elt_ptr_type);
+   gimple_seq stmts = NULL;
+   tree base = gimple_convert (&stmts, elt_ptr_type,
+   args[0]);
+   new_stmt = gimple_build_assign (temp_lhs,
+fold_build2 (MEM_REF,
+TREE_TYPE (temp_lhs),
+base,
+zero));

this now uses the alignment info as on the LHS of the call by using
TREE_TYPE (temp_lhs) as type of the MEM_REF.  So for example

  typedef int foo __attribute__((vector_size(N),aligned(256)));

  foo tem = ld1 (ptr);

will now access *ptr as if it were aligned to 256 bytes.  But I'm sure
the ld1 intrinsic documents the required alignment (either it's the
natural alignment of the vector type loaded or element alignment?).

For element alignment you'd do sth like

   tree access_type = build_aligned_type (vector_type, TYPE_ALIGN
(TREE_TYPE (vector_type)));

for example.

Richard.

Hi,

I'm taking over this patch from Jirui.

I've decided to use the vector type stored in aarch64_simd_type_info, 
since that should always have the correct alignment.


To be fair though, I do wonder whether this is actually needed as is 
right now, since the way we cast the inputs and outputs of these 
__builtins in arm_neon.h prevents these issues I think, but it is more 
future proof. Also you could argue people could use the __builtins 
directly, though I'd think that would be at their own risk.


Is this OK?

Kind regards,
Andrediff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index 
1a507ea59142d0b5977b0167abfe9a58a567adf7..a815e4cfbccab692ca688ba87c71b06c304abbfb
 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -46,6 +46,7 @@
 #include "emit-rtl.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "gimple-fold.h"
 
 #define v8qi_UP  E_V8QImode
 #define v4hi_UP  E_V4HImode
@@ -2399,11 +2400,65 @@ aarch64_general_fold_builtin (unsigned int fcode, tree 
type,
   return NULL_TREE;
 }
 
+enum aarch64_simd_type
+get_mem_type_for_load_store (unsigned int fcode)
+{
+  switch (fcode)
+  {
+VAR1 (LOAD1, ld1 , 0, LOAD, v8qi)
+VAR1 (STORE1, st1 , 0, STORE, v8qi)
+  return Int8x8_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v16qi)
+VAR1 (STORE1, st1 , 0, STORE, v16qi)
+  return Int8x16_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v4hi)
+VAR1 (STORE1, st1 , 0, STORE, v4hi)
+  return Int16x4_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v8hi)
+VAR1 (STORE1, st1 , 0, STORE, v8hi)
+  return Int16x8_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v2si)
+VAR1 (STORE1, st1 , 0, STORE, v2si)
+  return Int32x2_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v4si)
+VAR1 (STORE1, st1 , 0, STORE, v4si)
+  return Int32x4_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v2di)
+VAR1 (STORE1, st1 , 0, STORE, v2di)
+  return Int64x2_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v4hf)
+VAR1 (STORE1, st1 , 0, STORE, v4hf)
+  return Float16x4_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v8hf)
+VAR1 (STORE1, st1 , 0, STORE, v8hf)
+  return Float16x8_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v4bf)
+VAR1 (STORE1, st1 , 0, STORE, v4bf)
+  return Bfloat16x4_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v8bf)
+VAR1 (STORE1, st1 , 0, STORE, v8bf)
+  return Bfloat16x8_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v2sf)
+VAR1 (STORE1, st1 , 0, STORE, v2sf)
+  return Float32x2_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v4sf)
+VAR1 (STORE1, st1 , 0, STORE, v4sf)
+  return Float32x4_t;
+VAR1 (LOAD1, ld1 , 0, LOAD, v2df)
+VAR1 (STORE1, st1 , 0, STORE, v2df)
+  return Float64x2_t;
+default:
+  gcc_unreachable ();
+  break;
+  }
+}
+
 /* Try to fold STMT, given that it's a call to the built-in function with
subcode FCODE.  Return the new statement on success and null on
failure.  */
 gimple *
-aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt)
+aarch64_general_gimple_fol

Re: FW: [PING] Re: [Patch][GCC][middle-end] - Generate FRINTZ for (double)(int) under -ffast-math on aarch64

2021-10-20 Thread Andre Vieira (lists) via Gcc-patches


On 19/10/2021 00:22, Joseph Myers wrote:

On Fri, 15 Oct 2021, Richard Biener via Gcc-patches wrote:


On Fri, Sep 24, 2021 at 2:59 PM Jirui Wu via Gcc-patches
 wrote:

Hi,

Ping: https://gcc.gnu.org/pipermail/gcc-patches/2021-August/577846.html

The patch is attached as text for ease of use. Is there anything that needs to 
change?

Ok for master? If OK, can it be committed for me, I have no commit rights.

I'm still not sure about the correctness.  I suppose the
flag_fp_int_builtin_inexact && !flag_trapping_math is supposed to guard
against spurious inexact exceptions, shouldn't that be
!flag_fp_int_builtin_inexact || !flag_trapping_math instead?

The following remarks may be relevant here, but are not intended as an
assertion of what is correct in this case.

1. flag_fp_int_builtin_inexact is the more permissive case ("inexact" may
or may not be raised).  All existing uses in back ends are
"flag_fp_int_builtin_inexact || !flag_trapping_math" or equivalent.

2. flag_fp_int_builtin_inexact only applies to certain built-in functions
(as listed in invoke.texi).  It's always unspecified, even in C2X, whether
casts of non-integer values from floating-point to integer types raise
"inexact".  So flag_fp_int_builtin_inexact should not be checked in insn
patterns corresponding to simple casts from floating-point to integer,
only in insn patterns corresponding to the built-in functions listed for
-fno-fp-int-builtin-inexact in invoke.texi (or for operations that combine
such a built-in function with a cast of the *result* to integer type).

Hi,

I agree with Joseph, I don't think we should be checking 
flag_fp_int_builtin_inexact here because we aren't transforming the math 
function 'trunc', but rather a piece of C-code that has trunc-like 
semantics.


As for flag_trapping_math, it's definition says 'Assume floating point 
operations can trap'. I assume IFN_TRUNC would not trap, since I don't 
think IFN_TRUNC will preserve the overflow behaviour, in the cases where 
the FP value is bigger than the intermediate integer type range. So I 
think we should prevent the transformation if we are assuming the FP 
instructions can trap.


If we don't assume the FP instructions can trap, then I think it's fine 
to ignore the overflow as this behavior is undefined in C.


Also changed the comment. Slightly different to your suggestion Richard, 
in an attempt to be more generic. Do you still have concerns regarding 
the checks?


Kind regards,
Andrediff --git a/gcc/match.pd b/gcc/match.pd
index 
3ff15bc0de5aba45ade94ca6e47e01fad9a2a314..5bed2e12715ea213813ef8b84fd420475b04d201
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3606,6 +3606,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 >= inside_prec - !inside_unsignedp)
  (convert @0)))
 
+/* (float_type)(integer_type) x -> trunc (x) if the type of x matches
+   float_type.  Only do the transformation if we do not need to preserve
+   trapping behaviour, so require !flag_trapping_math. */
+#if GIMPLE
+(simplify
+   (float (fix_trunc @0))
+   (if (!flag_trapping_math
+   && types_match (type, TREE_TYPE (@0))
+   && direct_internal_fn_supported_p (IFN_TRUNC, type,
+ OPTIMIZE_FOR_BOTH))
+  (IFN_TRUNC @0)))
+#endif
+
 /* If we have a narrowing conversion to an integral type that is fed by a
BIT_AND_EXPR, we might be able to remove the BIT_AND_EXPR if it merely
masks off bits outside the final type (and nothing else).  */
diff --git a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c 
b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
new file mode 100644
index 
..07217064e2ba54fcf4f5edc440e6ec19ddae66e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math" } */
+
+float
+f1 (float x)
+{
+  int y = x;
+
+  return (float) y;
+}
+
+double
+f2 (double x)
+{
+  long y = x;
+
+  return (double) y;
+}
+
+float
+f3 (double x)
+{
+  int y = x;
+
+  return (float) y;
+}
+
+double
+f4 (float x)
+{
+  int y = x;
+
+  return (double) y;
+}
+
+/* { dg-final { scan-assembler "frintz\\ts\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "frintz\\td\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtzs\\tw\[0-9\]+, d\[0-9\]+" } } */
+/* { dg-final { scan-assembler "scvtf\\ts\[0-9\]+, w\[0-9\]+" } } */
+/* { dg-final { scan-assembler "fcvtzs\\tw\[0-9\]+, s\[0-9\]+" } } */
+/* { dg-final { scan-assembler "scvtf\\td\[0-9\]+, w\[0-9\]+" } } */


Re: [Patch][GCC][middle-end] - Lower store and load neon builtins to gimple

2021-10-20 Thread Richard Biener via Gcc-patches
On Wed, 20 Oct 2021, Andre Vieira (lists) wrote:

> On 27/09/2021 12:54, Richard Biener via Gcc-patches wrote:
> > On Mon, 27 Sep 2021, Jirui Wu wrote:
> >
> >> Hi all,
> >>
> >> I now use the type based on the specification of the intrinsic
> >> instead of type based on formal argument.
> >>
> >> I use signed Int vector types because the outputs of the neon builtins
> >> that I am lowering is always signed. In addition, fcode and stmt
> >> does not have information on whether the result is signed.
> >>
> >> Because I am replacing the stmt with new_stmt,
> >> a VIEW_CONVERT_EXPR cast is already in the code if needed.
> >> As a result, the result assembly code is correct.
> >>
> >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >>
> >> Ok for master? If OK can it be committed for me, I have no commit rights.
> > +   tree temp_lhs = gimple_call_lhs (stmt);
> > +   aarch64_simd_type_info simd_type
> > + = aarch64_simd_types[mem_type];
> > +   tree elt_ptr_type = build_pointer_type (simd_type.eltype);
> > +   tree zero = build_zero_cst (elt_ptr_type);
> > +   gimple_seq stmts = NULL;
> > +   tree base = gimple_convert (&stmts, elt_ptr_type,
> > +   args[0]);
> > +   new_stmt = gimple_build_assign (temp_lhs,
> > +fold_build2 (MEM_REF,
> > +TREE_TYPE (temp_lhs),
> > +base,
> > +zero));
> >
> > this now uses the alignment info as on the LHS of the call by using
> > TREE_TYPE (temp_lhs) as type of the MEM_REF.  So for example
> >
> >   typedef int foo __attribute__((vector_size(N),aligned(256)));
> >
> >   foo tem = ld1 (ptr);
> >
> > will now access *ptr as if it were aligned to 256 bytes.  But I'm sure
> > the ld1 intrinsic documents the required alignment (either it's the
> > natural alignment of the vector type loaded or element alignment?).
> >
> > For element alignment you'd do sth like
> >
> >tree access_type = build_aligned_type (vector_type, TYPE_ALIGN
> > (TREE_TYPE (vector_type)));
> >
> > for example.
> >
> > Richard.
> Hi,
> 
> I'm taking over this patch from Jirui.
> 
> I've decided to use the vector type stored in aarch64_simd_type_info, since
> that should always have the correct alignment.
> 
> To be fair though, I do wonder whether this is actually needed as is right
> now, since the way we cast the inputs and outputs of these __builtins in
> arm_neon.h prevents these issues I think, but it is more future proof. Also
> you could argue people could use the __builtins directly, though I'd think
> that would be at their own risk.
> 
> Is this OK?

Yes, this variant looks OK.

> Kind regards,
> Andre
> 


Re: FW: [PING] Re: [Patch][GCC][middle-end] - Generate FRINTZ for (double)(int) under -ffast-math on aarch64

2021-10-20 Thread Richard Biener via Gcc-patches
On Wed, 20 Oct 2021, Andre Vieira (lists) wrote:

> 
> On 19/10/2021 00:22, Joseph Myers wrote:
> > On Fri, 15 Oct 2021, Richard Biener via Gcc-patches wrote:
> >
> >> On Fri, Sep 24, 2021 at 2:59 PM Jirui Wu via Gcc-patches
> >>  wrote:
> >>> Hi,
> >>>
> >>> Ping: https://gcc.gnu.org/pipermail/gcc-patches/2021-August/577846.html
> >>>
> >>> The patch is attached as text for ease of use. Is there anything that
> >>> needs to change?
> >>>
> >>> Ok for master? If OK, can it be committed for me, I have no commit rights.
> >> I'm still not sure about the correctness.  I suppose the
> >> flag_fp_int_builtin_inexact && !flag_trapping_math is supposed to guard
> >> against spurious inexact exceptions, shouldn't that be
> >> !flag_fp_int_builtin_inexact || !flag_trapping_math instead?
> > The following remarks may be relevant here, but are not intended as an
> > assertion of what is correct in this case.
> >
> > 1. flag_fp_int_builtin_inexact is the more permissive case ("inexact" may
> > or may not be raised).  All existing uses in back ends are
> > "flag_fp_int_builtin_inexact || !flag_trapping_math" or equivalent.
> >
> > 2. flag_fp_int_builtin_inexact only applies to certain built-in functions
> > (as listed in invoke.texi).  It's always unspecified, even in C2X, whether
> > casts of non-integer values from floating-point to integer types raise
> > "inexact".  So flag_fp_int_builtin_inexact should not be checked in insn
> > patterns corresponding to simple casts from floating-point to integer,
> > only in insn patterns corresponding to the built-in functions listed for
> > -fno-fp-int-builtin-inexact in invoke.texi (or for operations that combine
> > such a built-in function with a cast of the *result* to integer type).
> Hi,
> 
> I agree with Joseph, I don't think we should be checking
> flag_fp_int_builtin_inexact here because we aren't transforming the math
> function 'trunc', but rather a piece of C-code that has trunc-like semantics.

But we are generating 'trunc' which may now raise a spurious exception.
OTOH flag_fp_int_builtin_inexact wouldn't help here because
"may or may not" stil may raise spurious exception flags.

> As for flag_trapping_math, it's definition says 'Assume floating point
> operations can trap'. I assume IFN_TRUNC would not trap, since I don't think
> IFN_TRUNC will preserve the overflow behaviour, in the cases where the FP
> value is bigger than the intermediate integer type range. So I think we should
> prevent the transformation if we are assuming the FP instructions can trap.

Note trap == set exception flags, not only raise a trap.

> If we don't assume the FP instructions can trap, then I think it's fine to
> ignore the overflow as this behavior is undefined in C.
> 
> Also changed the comment. Slightly different to your suggestion Richard, in an
> attempt to be more generic. Do you still have concerns regarding the checks?

I think your updated patch is OK.

Thanks,
Richard.


[COMMITTED] Remove superflous debugging messages from the threading registry.

2021-10-20 Thread Aldy Hernandez via Gcc-patches
These are some random obvious cleanups to the threading dumps, since
it seems I'm not the only one looking at dumps these days.

The "just threaded" debugging message is redundant since there's
already an equivalent "Registering jump thread" message.

The "about to thread" message is actually confusing, because the source
block doesn't match the IL, since the CFG update is mid-flight.

Tested on x86-64 Linux.

gcc/ChangeLog:

* tree-ssa-threadupdate.c 
(back_jt_path_registry::adjust_paths_after_duplication):
Remove superflous debugging message.
(back_jt_path_registry::duplicate_thread_path): Same.
---
 gcc/tree-ssa-threadupdate.c | 12 
 1 file changed, 12 deletions(-)

diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index 293836cdc53..8e6f043bb44 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -2295,12 +2295,6 @@ back_jt_path_registry::adjust_paths_after_duplication 
(unsigned curr_path_num)
 {
   vec *curr_path = m_paths[curr_path_num];
 
-  if (dump_file && (dump_flags & TDF_DETAILS))
-{
-  fprintf (dump_file, "just threaded: ");
-  debug_path (dump_file, curr_path_num);
-}
-
   /* Iterate through all the other paths and adjust them.  */
   for (unsigned cand_path_num = 0; cand_path_num < m_paths.length (); )
 {
@@ -2409,12 +2403,6 @@ back_jt_path_registry::duplicate_thread_path (edge entry,
   if (!can_copy_bbs_p (region, n_region))
 return false;
 
-  if (dump_file && (dump_flags & TDF_DETAILS))
-{
-  fprintf (dump_file, "\nabout to thread: ");
-  debug_path (dump_file, current_path_no);
-}
-
   /* Some sanity checking.  Note that we do not check for all possible
  missuses of the functions.  I.e. if you ask to copy something weird,
  it will work, but the state of structures probably will not be
-- 
2.31.1



[COMMITTED] Remove unused back_threader destructor.

2021-10-20 Thread Aldy Hernandez via Gcc-patches
Tested on x86-64 Linux.

gcc/ChangeLog:

* tree-ssa-threadbackward.c (back_threader::~back_threader): Remove.
---
 gcc/tree-ssa-threadbackward.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 96422a1390e..d94e3b962db 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -76,7 +76,6 @@ class back_threader
 {
 public:
   back_threader (bool speed_p, bool resolve);
-  ~back_threader ();
   void maybe_thread_block (basic_block bb);
   bool thread_through_all_blocks (bool may_peel_loop_headers);
 private:
@@ -128,10 +127,6 @@ back_threader::back_threader (bool speed_p, bool resolve)
   m_resolve = resolve;
 }
 
-back_threader::~back_threader ()
-{
-}
-
 // Register the current path for jump threading if it's profitable to
 // do so.
 //
-- 
2.31.1



[PATCH] Try to resolve paths in threader without looking further back.

2021-10-20 Thread Aldy Hernandez via Gcc-patches
Sometimes we can solve a candidate path without having to recurse
further back.  This can mostly happen in fully resolving mode, because
we can ask the ranger what the range on entry to the path is, but
there's no reason this can't always apply.  This one-liner removes
the fully-resolving restriction.

I'm tickled pink to see how many things we now get quite early
in the compilation.  I actually had to disable jump threading entirely
for a few tests because the early threader was catching things
disturbingly early.  Also, as Richi predicted, I saw a lot of pre-VRP
cleanups happening.

I was going to commit this as obvious, but I think the test changes
merit discussion.

We've been playing games with gcc.dg/tree-ssa/ssa-thread-11.c for quite
some time.  Every time a threading pass gets smarter, we push the
check further down the pipeline.  We've officially run out of dumb
threading passes to disable ;-).  In the last year we've gone up from a
handful of threads, to 34 threads with the current combination of
options.  I doubt this is testing anything useful any more, so I've
removed it.

Similarly for gcc.dg/tree-ssa/ssa-dom-thread-4.c.  We used to thread 3
jump threads, but they were disallowed because of loop rotation.  Then
we started catching more jump threads in VRP2 threading so we tested
there.  With this patch though, we triple the number of threads found
from 11 to 31.  I believe this test has outlived its usefulness, and
I've removed it.  Note that even though we have these outrageous
possibilities for this test, the block copier ultimately chops them
down (23 survive though).

Likewise for ssa-dom-thread-7.c.  The number of threads in this test has
been growing consistently over the years.  There's no way to test
what is possible, especially because improvements in one threader open
up possibilities for another.  With this patch we're up to 41 registered
jump threads and they're spread over 4 passes.  There's no way to get the
amount right, and this test has become a source of useless busywork.

All in all, I believe the simpler jump threading tests, as well as the
gimple FE tests I've added, more than adequately cover us.

Tested on x86-64 Linux.

OK for trunk?

p.s. As usual, some warning pass gets thrown off.  Martin, I've XFAILed
it.

gcc/ChangeLog:

* tree-ssa-threadbackward.c (back_threader::find_paths_to_names):
Always try to resolve path without looking back.

gcc/testsuite/ChangeLog:

* gcc.dg/graphite/scop-dsyr2k-2.c: Adjust for jump threading changes.
* gcc.dg/graphite/scop-dsyr2k.c: Same.
* gcc.dg/graphite/scop-dsyrk-2.c: Same.
* gcc.dg/graphite/scop-dsyrk.c: Same.
* gcc.dg/tree-ssa/pr20701.c: Same.
* gcc.dg/tree-ssa/pr20702.c: Same.
* gcc.dg/tree-ssa/pr21086.c: Same.
* gcc.dg/tree-ssa/pr25382.c: Same.
* gcc.dg/tree-ssa/pr58480.c: Same.
* gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Same.
* gcc.dg/tree-ssa/vrp08.c: Same.
* gcc.dg/tree-ssa/vrp55.c: Same.
* gcc.dg/tree-ssa/ssa-dom-thread-4.c: Removed.
* gcc.dg/tree-ssa/ssa-dom-thread-7.c: Removed.
* gcc.dg/tree-ssa/ssa-thread-11.c: Removed.
* gcc.dg/uninit-pr89230-1.c: xfail.
---
 gcc/testsuite/gcc.dg/graphite/scop-dsyr2k-2.c |   1 +
 gcc/testsuite/gcc.dg/graphite/scop-dsyr2k.c   |   1 +
 gcc/testsuite/gcc.dg/graphite/scop-dsyrk-2.c  |   1 +
 gcc/testsuite/gcc.dg/graphite/scop-dsyrk.c|   1 +
 gcc/testsuite/gcc.dg/tree-ssa/pr20701.c   |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr20702.c   |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr21086.c   |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr25382.c   |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr58480.c   |   2 +-
 .../gcc.dg/tree-ssa/ssa-dom-thread-4.c|  60 
 .../gcc.dg/tree-ssa/ssa-dom-thread-7.c| 134 --
 gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-11.c |  50 ---
 .../gcc.dg/tree-ssa/ssa-vrp-thread-1.c|   4 +-
 gcc/testsuite/gcc.dg/tree-ssa/vrp08.c |   2 +-
 gcc/testsuite/gcc.dg/tree-ssa/vrp55.c |   6 +-
 gcc/testsuite/gcc.dg/uninit-pr89230-1.c   |   3 +-
 gcc/tree-ssa-threadbackward.c |   4 +-
 17 files changed, 19 insertions(+), 258 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-4.c
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-thread-7.c
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-11.c

diff --git a/gcc/testsuite/gcc.dg/graphite/scop-dsyr2k-2.c 
b/gcc/testsuite/gcc.dg/graphite/scop-dsyr2k-2.c
index 06aa19a8577..42e23fc157e 100644
--- a/gcc/testsuite/gcc.dg/graphite/scop-dsyr2k-2.c
+++ b/gcc/testsuite/gcc.dg/graphite/scop-dsyr2k-2.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fno-thread-jumps" } */
 #define NMAX 3000
 
 static double a[NMAX][NMAX], b[NMAX][NMAX], c[NMAX][NMAX];
diff --git a/gcc/testsuite/gcc.dg/graphite/scop-dsyr2k.c 
b/gcc/testsuite/g

Re: [PATCH] X86: Add an option -muse-unaligned-vector-move

2021-10-20 Thread Xu Dianhong via Gcc-patches
Many thanks for your explanation. I got the meaning of operands.
The "addpd b(%rip), %xmm0" instruction needs "b(%rip)" aligned otherwise
it will rise a "Real-Address Mode Exceptions".
I haven't considered this situation  "b(%rip)" has an address dependence of
"a(%rip)" before. I think this situation could be resolved on the assembler
side except for this dummy code like "movapd 0x200b37(%rip),%xmm1, ...
addpd  0x200b37(%rip),%xmm0 ".

On Wed, Oct 20, 2021 at 4:06 PM Richard Biener 
wrote:

> On Wed, Oct 20, 2021 at 9:48 AM Xu Dianhong  wrote:
> >
> > Thanks for the comments.
> >
> > > And does it even work?
> > It works, I checked it in the test case, and when using this option, it
> can emit an unaligned vector move.
> > >I fail to see adjustments to memory operands of
> > SSE/AVX instructions that have to be aligned
> > I changed all vector move in "get_ssemov" without checking the move with
> memory operands or not.
> > >and now would need to be
> > pushed to separate unaligned moves with an extra register?
> > I think it did not use an extra register. I'm not sure if I got your
> question, and this patch just change the final operator of SSE MOVE from
> aligned operator to unaligned operator, and I did not change the operands.
>
> For example
>
> typedef double v2df __attribute__((vector_size(16)));
>
> v2df a, b;
>
> void foo ()
> {
>   a += b;
> }
>
> will compile to
>
> foo:
> .LFB0:
> .cfi_startproc
> movapd  a(%rip), %xmm0
> addpd   b(%rip), %xmm0
> movaps  %xmm0, a(%rip)
> ret
>
> what should -muse-unaligned-vector-move do here?  The addpd b(%rip), %xmm0
> instruction implies an aligned move from b(%rip).
>
> It looks your patch could be better implemented in the assembler, just
> using
> the unaligned encodings for aligned moves?
>
> Richard.
>
> > On Wed, Oct 20, 2021 at 3:04 PM Richard Biener <
> richard.guent...@gmail.com> wrote:
> >>
> >> On Wed, Oct 20, 2021 at 9:02 AM Richard Biener
> >>  wrote:
> >> >
> >> > On Wed, Oct 20, 2021 at 7:31 AM dianhong.xu--- via Gcc-patches
> >> >  wrote:
> >> > >
> >> > > From: dianhong xu 
> >> > >
> >> > > Add -muse-unaligned-vector-move option to emit unaligned vector move
> >> > > instaructions.
> >> >
> >> > Why would you ever want to have such option?!  Should the
> documentation
> >> > at least read "emit unaligned vector moves even for aligned storage
> or when
> >> > using aligned move intrinsics"?
> >>
> >> And does it even work?  I fail to see adjustments to memory operands of
> >> SSE/AVX instructions that have to be aligned and now would need to be
> >> pushed to separate unaligned moves with an extra register?
> >>
> >> Richard.
> >>
> >> >
> >> > Richard.
> >> >
> >> > > gcc/ChangeLog:
> >> > >
> >> > > * config/i386/i386-options.c (ix86_target_string): Add
> >> > > -muse-unaligned-vector-move.
> >> > > * config/i386/i386.c (ix86_get_ssemov): Emit unaligned
> vector if use
> >> > > the new option.
> >> > > * config/i386/i386.opt (muse-unaligned-vector-move): New.
> >> > > * config/i386/sse.md: Emit unaligned vector if use this new
> option
> >> > > * doc/invoke.texi: Document -muse-unaligned-vector-move
> >> > >
> >> > > gcc/testsuite/ChangeLog:
> >> > >
> >> > > * gcc.target/i386/avx2-vector-unaligned-load-store-1.c: New
> test.
> >> > > * gcc.target/i386/avx2-vector-unaligned-load-store-2.c: New
> test.
> >> > > * gcc.target/i386/avx2-vector-unaligned-load-store-3.c: New
> test.
> >> > > * gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c:
> New test.
> >> > > ---
> >> > >  gcc/config/i386/i386-options.c|   3 +-
> >> > >  gcc/config/i386/i386.c|  41 +++
> >> > >  gcc/config/i386/i386.opt  |   4 +
> >> > >  gcc/config/i386/sse.md|  30 +++--
> >> > >  gcc/doc/invoke.texi   |   7 ++
> >> > >  .../i386/avx2-vector-unaligned-load-store-1.c | 102
> +
> >> > >  .../i386/avx2-vector-unaligned-load-store-2.c | 107
> ++
> >> > >  .../i386/avx2-vector-unaligned-load-store-3.c |  11 ++
> >> > >  .../avx512vl-vector-unaligned-load-store-1.c  |  13 +++
> >> > >  9 files changed, 287 insertions(+), 31 deletions(-)
> >> > >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-1.c
> >> > >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-2.c
> >> > >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx2-vector-unaligned-load-store-3.c
> >> > >  create mode 100644
> gcc/testsuite/gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c
> >> > >
> >> > > diff --git a/gcc/config/i386/i386-options.c
> b/gcc/config/i386/i386-options.c
> >> > > index c9523b26f49..eacbd0f5451 100644
> >> > > --- a/gcc/config/i386/i386-options.c
> >> > > +++ b/gcc/config/i386/i386-options.c
> >> > > @@ -397,7 +397,8 @@ ix86_target_string 

Re: [Patch] libgomp.texi: Update OMP_PLACES

2021-10-20 Thread Tobias Burnus

On 18.10.21 09:52, Jakub Jelinek wrote:


On Mon, Oct 18, 2021 at 09:22:51AM +0200, Tobias Burnus wrote:

This patch updates the OMP_PLACES description for the recent
OpenMP 5.1 changes.
I actually wonder when/whether the spec reference
should be updated to OpenMP 5.1 or an additional
reference to it should be added.


(That question is still open. I think we have the problem that only 4.5
is fully supported while 5.0+5.1 features are supported and documented
for some items.)


The first paragraph looks good, but I think the latter change only adds to
confusion that the following text already has.
Do you think you could try to reword it ... or should I?


I attached an updated version, but I am also not completely happy with
it. – Actually, the wording in the OpenMP spec is also not clear and
having an incomplete description in words plus a complex and fully
syntax as grammar (but without stating some details or only via an
example) is also not helpful.

As I am not completely happy with the attached patch, I like to leave
the rewording to you; that's unless you only have some minor suggestions.

Thanks,

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp.texi: Update OMP_PLACES

libgomp/ChangeLog:

	* libgomp.texi (OMP_PLACES): Extend description for OMP 5.1 changes.

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index e9fa8ba0bf7..aee82ef2ba2 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -2031,25 +2031,33 @@ When undefined, @env{OMP_PROC_BIND} defaults to @code{TRUE} when
 @table @asis
 @item @emph{Description}:
 The thread placement can be either specified using an abstract name or by an
-explicit list of the places.  The abstract names @code{threads}, @code{cores}
-and @code{sockets} can be optionally followed by a positive number in
-parentheses, which denotes the how many places shall be created.  With
-@code{threads} each place corresponds to a single hardware thread; @code{cores}
-to a single core with the corresponding number of hardware threads; and with
-@code{sockets} the place corresponds to a single socket.  The resulting
-placement can be shown by setting the @env{OMP_DISPLAY_ENV} environment
-variable.
+explicit list of the places.  The abstract names @code{threads}, @code{cores},
+@code{sockets}, @code{ll_caches} and @code{numa_domains} can be optionally
+followed by a positive number in parentheses, which denotes the how many places
+shall be created.  With @code{threads} each place corresponds to a single
+hardware thread; @code{cores} to a single core with the corresponding number of
+hardware threads; with @code{sockets} the place corresponds to a single
+socket; with @code{ll_caches} to a set of cores that shares the last level
+cache on the device; and @code{numa_domains} to a set of cores for which their
+closest memory on the device is the same meory and at a similar distance from
+the cores.  The resulting placement can be shown by setting the
+@env{OMP_DISPLAY_ENV} environment variable.
 
 Alternatively, the placement can be specified explicitly as comma-separated
 list of places.  A place is specified by set of nonnegative numbers in curly
-braces, denoting the denoting the hardware threads.  The hardware threads
+braces, denoting the denoting the hardware threads.  (The curly braces can be
+omitted when only a single number has been specified.)  The hardware threads
 belonging to a place can either be specified as comma-separated list of
 nonnegative thread numbers or using an interval.  Multiple places can also be
 either specified by a comma-separated list of places or by an interval.  To
-specify an interval, a colon followed by the count is placed after after
+specify an interval, a colon followed by the count is placed after
 the hardware thread number or the place.  Optionally, the length can be
 followed by a colon and the stride number -- otherwise a unit stride is
-assumed.  For instance, the following specifies the same places list:
+assumed. Placing an exclamation mark (@code{!}) directly before a curly
+brace or numbers inside the curley braces (excluding intervals) will
+exclude those hardware threads.
+
+For instance, the following specifies the same places list:
 @code{"@{0,1,2@}, @{3,4,6@}, @{7,8,9@}, @{10,11,12@}"};
 @code{"@{0:3@}, @{3:3@}, @{7:3@}, @{10:3@}"}; and @code{"@{0:2@}:4:3"}.
 


Re: [PATCH] X86: Add an option -muse-unaligned-vector-move

2021-10-20 Thread Richard Biener via Gcc-patches
On Wed, Oct 20, 2021 at 12:40 PM Xu Dianhong  wrote:
>
> Many thanks for your explanation. I got the meaning of operands.
> The "addpd b(%rip), %xmm0" instruction needs "b(%rip)" aligned otherwise it 
> will rise a "Real-Address Mode Exceptions".
> I haven't considered this situation  "b(%rip)" has an address dependence of 
> "a(%rip)" before. I think this situation could be resolved on the assembler 
> side except for this dummy code like "movapd 0x200b37(%rip),%xmm1, ... addpd  
> 0x200b37(%rip),%xmm0 ".

Of course the compiler will only emit instructions which have the
constraint of aligned memory
when the memory is known to be aligned.  That's why I wonder why you
would need such
option.  "Real-Address Mode Exceptions" may point to the issue, but I
wonder what's different
in real mode vs. protected mode - even with segmentation the alignment
of objects should
prevail unless you play linker"tricks" that make global objects have
different alignment - but
then it's better to adjust the respective hooks to not falsely claim
such alignment.  Consider
for example

   if ((uintptr_t)&a & 0x7)
 foo();
  else
 bar();

GCC will optimize the branch statically to always call foo if 'a'
appears to be aligned,
even if you later try to "override" this with an option.  Alignment is
not only about
moves, it's also about knowledge about low bits in addresses and about
alias analysis where alignment constrains how two objects can overlap.

So - do not lie to the compiler!  A late "workaround" avoiding aligned
SSE moves isn't a proper fix.

Richard.

> On Wed, Oct 20, 2021 at 4:06 PM Richard Biener  
> wrote:
>>
>> On Wed, Oct 20, 2021 at 9:48 AM Xu Dianhong  wrote:
>> >
>> > Thanks for the comments.
>> >
>> > > And does it even work?
>> > It works, I checked it in the test case, and when using this option, it 
>> > can emit an unaligned vector move.
>> > >I fail to see adjustments to memory operands of
>> > SSE/AVX instructions that have to be aligned
>> > I changed all vector move in "get_ssemov" without checking the move with 
>> > memory operands or not.
>> > >and now would need to be
>> > pushed to separate unaligned moves with an extra register?
>> > I think it did not use an extra register. I'm not sure if I got your 
>> > question, and this patch just change the final operator of SSE MOVE from 
>> > aligned operator to unaligned operator, and I did not change the operands.
>>
>> For example
>>
>> typedef double v2df __attribute__((vector_size(16)));
>>
>> v2df a, b;
>>
>> void foo ()
>> {
>>   a += b;
>> }
>>
>> will compile to
>>
>> foo:
>> .LFB0:
>> .cfi_startproc
>> movapd  a(%rip), %xmm0
>> addpd   b(%rip), %xmm0
>> movaps  %xmm0, a(%rip)
>> ret
>>
>> what should -muse-unaligned-vector-move do here?  The addpd b(%rip), %xmm0
>> instruction implies an aligned move from b(%rip).
>>
>> It looks your patch could be better implemented in the assembler, just using
>> the unaligned encodings for aligned moves?
>>
>> Richard.
>>
>> > On Wed, Oct 20, 2021 at 3:04 PM Richard Biener 
>> >  wrote:
>> >>
>> >> On Wed, Oct 20, 2021 at 9:02 AM Richard Biener
>> >>  wrote:
>> >> >
>> >> > On Wed, Oct 20, 2021 at 7:31 AM dianhong.xu--- via Gcc-patches
>> >> >  wrote:
>> >> > >
>> >> > > From: dianhong xu 
>> >> > >
>> >> > > Add -muse-unaligned-vector-move option to emit unaligned vector move
>> >> > > instaructions.
>> >> >
>> >> > Why would you ever want to have such option?!  Should the documentation
>> >> > at least read "emit unaligned vector moves even for aligned storage or 
>> >> > when
>> >> > using aligned move intrinsics"?
>> >>
>> >> And does it even work?  I fail to see adjustments to memory operands of
>> >> SSE/AVX instructions that have to be aligned and now would need to be
>> >> pushed to separate unaligned moves with an extra register?
>> >>
>> >> Richard.
>> >>
>> >> >
>> >> > Richard.
>> >> >
>> >> > > gcc/ChangeLog:
>> >> > >
>> >> > > * config/i386/i386-options.c (ix86_target_string): Add
>> >> > > -muse-unaligned-vector-move.
>> >> > > * config/i386/i386.c (ix86_get_ssemov): Emit unaligned vector 
>> >> > > if use
>> >> > > the new option.
>> >> > > * config/i386/i386.opt (muse-unaligned-vector-move): New.
>> >> > > * config/i386/sse.md: Emit unaligned vector if use this new 
>> >> > > option
>> >> > > * doc/invoke.texi: Document -muse-unaligned-vector-move
>> >> > >
>> >> > > gcc/testsuite/ChangeLog:
>> >> > >
>> >> > > * gcc.target/i386/avx2-vector-unaligned-load-store-1.c: New 
>> >> > > test.
>> >> > > * gcc.target/i386/avx2-vector-unaligned-load-store-2.c: New 
>> >> > > test.
>> >> > > * gcc.target/i386/avx2-vector-unaligned-load-store-3.c: New 
>> >> > > test.
>> >> > > * gcc.target/i386/avx512vl-vector-unaligned-load-store-1.c: 
>> >> > > New test.
>> >> > > ---
>> >> > >  gcc/config/i386/i386-options.c|   3 +-
>> >> > >  gcc/config/i386/i386.c  

Re: [PATCH] AArch64: Tune case-values-threshold

2021-10-20 Thread Richard Sandiford via Gcc-patches
Wilco Dijkstra  writes:
> Hi Richard,
>
>> The problem is that you're effectively asking for these values to be
>> taken on faith without providing any analysis and without describing
>> how you arrived at the new numbers.  Did you try other values too?
>> If so, how did they compare with the numbers that you finally chose?
>> At least that would give an indication of where the boundaries are.
>
> Yes, I obviously tried other values, pretty much all in range 1-20. There is
> generally a range of 4-5 values that are very similar in size, and then you
> choose one in the middle which also looks good for performance.
>
>> For example, it's easier to believe that 8 is the right value for -Os if
>> you say that you tried 9 and 7 as well, and they were worse than 8 by X%
>> and Y%.  This would also help anyone who wants to tweak the numbers
>> again in future.
>
> For -Os, the size range for values 6-10 is within 0.01% so they are virtually
> identical and I picked the median. Whether this will remain best in the future
> is unclear since it depends on so many things, so at some point it needs
> to be looked at again, just like most other tunings.

Thanks.  These details are useful.  For example, if someone finds
a compelling reason to bump the new values by +/-2 (to help with a
particular test case) then it sounds we should accept that, since it
wouldn't conflict with your work.

So the patch is OK, thanks.

(FWIW, I tried building a linux kernel I had lying around at -Os,
which also showed an improvement of ~0.07%.)

Richard


Re: [PATCH] Adjust testcase for O2 vectorization.

2021-10-20 Thread Christophe Lyon via Gcc-patches
Hi,


On Tue, Oct 19, 2021 at 11:03 AM liuhongt via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

> updated patch:
>   1. Add documents in doc/sourcebuild.texi (Effective-Target Keywords).
>   2. Reduce -novec.c testcases to contain only new failed parted which
> is caused by O2 vectorization.
>   3. Add PR in dg-warning comment.
>
> As discussed in [1], this patch add xfail/target selector to those
> testcases, also make a copy of them so that they can be tested w/o
> vectorization.
>
> Newly added xfail/target selectors are used to check the vectorization
> capability of continuous byte/double bytes storage, these scenarios
> are exactly the part of the testcases that regressed after O2
> vectorization.
>
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581456.html.
>
> gcc/ChangeLog
>
> * doc/sourcebuild.texi (Effective-Target Keywords): Document
> vect_slp_v2qi_store, vect_slp_v4qi_store, vect_slp_v8qi_store,
> vect_slp_v16qi_store, vect_slp_v2hi_store,
> vect_slp_v4hi_store, vect_slp_v2si_store, vect_slp_v4si_store.
>
> gcc/testsuite/ChangeLog
>
> PR middle-end/102722
> PR middle-end/102697
> PR middle-end/102462
> PR middle-end/102706
> PR middle-end/102744
> * c-c++-common/Wstringop-overflow-2.c: Adjust testcase with new
> xfail/target selector.
> * gcc.dg/Warray-bounds-51.c: Ditto.
> * gcc.dg/Warray-parameter-3.c: Ditto.
> * gcc.dg/Wstringop-overflow-14.c: Ditto.
> * gcc.dg/Wstringop-overflow-21.c: Ditto.
> * gcc.dg/Wstringop-overflow-68.c: Ditto.
> * gcc.dg/Wstringop-overflow-76.c: Ditto.
> * gcc.dg/Warray-bounds-48.c: Ditto.
> * gcc.dg/Wzero-length-array-bounds-2.c: Ditto.
>

Some of these adjustments cause regressions on arm / aarch64, the exact
list depends on the target/flags.
See
https://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/r12-4525-gf36240f8c835d792f788b6724e272fc0a4a4f26f/report-build-info.html
for more on details on several combinations.

Can you have a look?

Christophe

* lib/target-supports.exp (check_vect_slp_aligned_store_usage):
> New function.
> (check_effective_target_vect_slp_v2qi_store): Ditto.
> (check_effective_target_vect_slp_v4qi_store): Ditto.
> (check_effective_target_vect_slp_v8qi_store): Ditto.
> (check_effective_target_vect_slp_v16qi_store): Ditto.
> (check_effective_target_vect_slp_v2hi_store): Ditto.
> (check_effective_target_vect_slp_v4hi_store): Ditto.
> (check_effective_target_vect_slp_v2si_store): Ditto.
> (check_effective_target_vect_slp_v4si_store): Ditto.
> * c-c++-common/Wstringop-overflow-2-novec.c: New test.
> * gcc.dg/Warray-bounds-51-novec.c: New test.
> * gcc.dg/Warray-bounds-48-novec.c: New test.
> * gcc.dg/Warray-parameter-3-novec.c: New test.
> * gcc.dg/Wstringop-overflow-14-novec.c: New test.
> * gcc.dg/Wstringop-overflow-21-novec.c: New test.
> * gcc.dg/Wstringop-overflow-76-novec.c: New test.
> * gcc.dg/Wzero-length-array-bounds-2-novec.c: New test.
> ---
>  gcc/doc/sourcebuild.texi  |  32 ++
>  .../c-c++-common/Wstringop-overflow-2-novec.c | 126 ++
>  .../c-c++-common/Wstringop-overflow-2.c   |  20 +-
>  gcc/testsuite/gcc.dg/Warray-bounds-48-novec.c | 364 ++
>  gcc/testsuite/gcc.dg/Warray-bounds-48.c   |   4 +-
>  gcc/testsuite/gcc.dg/Warray-bounds-51-novec.c |  21 +
>  gcc/testsuite/gcc.dg/Warray-bounds-51.c   |   2 +-
>  .../gcc.dg/Warray-parameter-3-novec.c |  16 +
>  gcc/testsuite/gcc.dg/Warray-parameter-3.c |   2 +-
>  .../gcc.dg/Wstringop-overflow-14-novec.c  |  16 +
>  gcc/testsuite/gcc.dg/Wstringop-overflow-14.c  |   4 +-
>  .../gcc.dg/Wstringop-overflow-21-novec.c  |  34 ++
>  gcc/testsuite/gcc.dg/Wstringop-overflow-21.c  |   8 +-
>  gcc/testsuite/gcc.dg/Wstringop-overflow-68.c  |  17 +-
>  .../gcc.dg/Wstringop-overflow-76-novec.c  |  88 +
>  gcc/testsuite/gcc.dg/Wstringop-overflow-76.c  |  18 +-
>  .../Wzero-length-array-bounds-2-novec.c   |  45 +++
>  .../gcc.dg/Wzero-length-array-bounds-2.c  |   2 +-
>  gcc/testsuite/lib/target-supports.exp | 182 +
>  19 files changed, 967 insertions(+), 34 deletions(-)
>  create mode 100644 gcc/testsuite/c-c++-common/Wstringop-overflow-2-novec.c
>  create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-48-novec.c
>  create mode 100644 gcc/testsuite/gcc.dg/Warray-bounds-51-novec.c
>  create mode 100644 gcc/testsuite/gcc.dg/Warray-parameter-3-novec.c
>  create mode 100644 gcc/testsuite/gcc.dg/Wstringop-overflow-14-novec.c
>  create mode 100644 gcc/testsuite/gcc.dg/Wstringop-overflow-21-novec.c
>  create mode 100644 gcc/testsuite/gcc.dg/Wstringop-overflow-76-novec.c
>  create mode 100644
> gcc/testsuite/gcc.dg/Wzero-length-array-bounds-2-novec.c
>
> diff --git a/gcc/doc/sourceb

[PATCH] tree-optimization/102853 - avoid trapping types in split_constant_offset

2021-10-20 Thread Richard Biener via Gcc-patches
This avoids running into the assert in compute_distributive_range when
starting the analysis with operations in a trapping type.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-10-20  Richard Biener  

PR tree-optimization/102853
* tree-data-ref.c (split_constant_offset_1): Bail out
immediately if the expression traps on overflow.
---
 gcc/tree-data-ref.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 57bac06242f..46f4ffedb48 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -757,6 +757,9 @@ split_constant_offset_1 (tree type, tree op0, enum 
tree_code code, tree op1,
   *var = NULL_TREE;
   *off = NULL_TREE;
 
+  if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
+return false;
+
   switch (code)
 {
 case INTEGER_CST:
-- 
2.31.1


Re: [PATCH 1/N] Rename asm_out_file function arguments.

2021-10-20 Thread Martin Liška

On 9/16/21 12:00, Martin Liška wrote:

As preparation for a new global object that will encapsulate
asm_out_file, we would need to live with a macro that will
define asm_out_file as casm->out_file and thus the name
can't be used in function arguments.

I've built all cross compilers with the change and
can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin


May I please ping this renaming patch?

Thanks,
Martin



Re: [match.pd] PR83750 - CSE erf/erfc pair

2021-10-20 Thread Prathamesh Kulkarni via Gcc-patches
On Tue, 19 Oct 2021 at 16:55, Richard Biener  wrote:
>
> On Tue, 19 Oct 2021, Prathamesh Kulkarni wrote:
>
> > On Tue, 19 Oct 2021 at 13:02, Richard Biener  
> > wrote:
> > >
> > > On Tue, Oct 19, 2021 at 9:03 AM Prathamesh Kulkarni via Gcc-patches
> > >  wrote:
> > > >
> > > > On Mon, 18 Oct 2021 at 17:23, Richard Biener  wrote:
> > > > >
> > > > > On Mon, 18 Oct 2021, Prathamesh Kulkarni wrote:
> > > > >
> > > > > > On Mon, 18 Oct 2021 at 17:10, Richard Biener  
> > > > > > wrote:
> > > > > > >
> > > > > > > On Mon, 18 Oct 2021, Prathamesh Kulkarni wrote:
> > > > > > >
> > > > > > > > On Mon, 18 Oct 2021 at 16:18, Richard Biener 
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > On Mon, 18 Oct 2021, Prathamesh Kulkarni wrote:
> > > > > > > > >
> > > > > > > > > > Hi Richard,
> > > > > > > > > > As suggested in PR, I have attached WIP patch that adds two 
> > > > > > > > > > patterns
> > > > > > > > > > to match.pd:
> > > > > > > > > > erfc(x) --> 1 - erf(x) if canonicalize_math_p() and,
> > > > > > > > > > 1 - erf(x) --> erfc(x) if !canonicalize_math_p().
> > > > > > > > > >
> > > > > > > > > > This works to remove call to erfc for the following test:
> > > > > > > > > > double f(double x)
> > > > > > > > > > {
> > > > > > > > > >   double g(double, double);
> > > > > > > > > >
> > > > > > > > > >   double t1 = __builtin_erf (x);
> > > > > > > > > >   double t2 = __builtin_erfc (x);
> > > > > > > > > >   return g(t1, t2);
> > > > > > > > > > }
> > > > > > > > > >
> > > > > > > > > > with .optimized dump shows:
> > > > > > > > > >   t1_2 = __builtin_erf (x_1(D));
> > > > > > > > > >   t2_3 = 1.0e+0 - t1_2;
> > > > > > > > > >
> > > > > > > > > > However, for the following test:
> > > > > > > > > > double f(double x)
> > > > > > > > > > {
> > > > > > > > > >   double g(double, double);
> > > > > > > > > >
> > > > > > > > > >   double t1 = __builtin_erfc (x);
> > > > > > > > > >   return t1;
> > > > > > > > > > }
> > > > > > > > > >
> > > > > > > > > > It canonicalizes erfc(x) to 1 - erf(x), but does not 
> > > > > > > > > > transform 1 -
> > > > > > > > > > erf(x) to erfc(x) again
> > > > > > > > > > post canonicalization.
> > > > > > > > > > -fdump-tree-folding shows that 1 - erf(x) --> erfc(x) gets 
> > > > > > > > > > applied,
> > > > > > > > > > but then it tries to
> > > > > > > > > > resimplify erfc(x), which fails post canonicalization. So 
> > > > > > > > > > we end up
> > > > > > > > > > with erfc(x) transformed to
> > > > > > > > > > 1 - erf(x) in .optimized dump, which I suppose isn't ideal.
> > > > > > > > > > Could you suggest how to proceed ?
> > > > > > > > >
> > > > > > > > > I applied your patch manually and it does the intended
> > > > > > > > > simplifications so I wonder what I am missing?
> > > > > > > > Would it be OK to always fold erfc(x) -> 1 - erf(x) even when 
> > > > > > > > there's
> > > > > > > > no erf(x) in the source ?
> > > > > > >
> > > > > > > I do think it's reasonable to expect erfc to be available when erf
> > > > > > > is and vice versa but note both are C99 specified functions 
> > > > > > > (either
> > > > > > > requires -lm).
> > > > > > OK, thanks. Would it be OK to commit the patch after bootstrap+test 
> > > > > > ?
> > > > >
> > > > > Yes, but I'm confused because you say the patch doesn't work for you?
> > > > The patch works for me to CSE erf/erfc pair.
> > > > However when there's only erfc in the source, it canonicalizes erfc(x)
> > > > to 1 - erf(x) but later fails to uncanonicalize 1 - erf(x) back to
> > > > erfc(x)
> > > > with -O3 -funsafe-math-optimizations.
> > > >
> > > > For,
> > > > t1 = __builtin_erfc(x),
> > > >
> > > > .optimized dump shows:
> > > >   _2 = __builtin_erf (x_1(D));
> > > >   t1_3 = 1.0e+0 - _2;
> > > >
> > > > and for,
> > > > double t1 = x + __builtin_erfc(x);
> > > >
> > > > .optimized dump shows:
> > > >   _3 = __builtin_erf (x_2(D));
> > > >   _7 = x_2(D) + 1.0e+0;
> > > >   t1_4 = _7 - _3;
> > > >
> > > > I assume in both cases, we want erfc in the code-gen instead ?
> > > > I think the reason uncaonicalization fails is because the pattern 1 -
> > > > erf(x) to erfc(x)
> > > > gets applied, but then it fails in resimplifying erfc(x), and we end
> > > > up with 1 - erf(x) in code-gen.
> > > >
> > > > From gimple-match.c, it hits the simplification:
> > > >
> > > > gimple_seq *lseq = seq;
> > > > if (__builtin_expect (!dbg_cnt
> > > > (match), 0)) goto next_after_fail1172;
> > > > if (__builtin_expect (dump_file &&
> > > > (dump_flags & TDF_FOLDING), 0)) fprintf (dump_file, "Applying pattern
> > > > %s:%d, %s:%d\n", "match.pd", 6162, __FILE__, __LINE__);
> > > > {
> > > >   res_op->set_op (CFN_BUILT_IN_ERFC, 
> > > > type, 1);
> > > >   res_op->ops[0] = captures[0];
> > > >   res_op->resimplify (l

Re: [PATCH 1/N] Rename asm_out_file function arguments.

2021-10-20 Thread Richard Biener via Gcc-patches
On Wed, Oct 20, 2021 at 1:58 PM Martin Liška  wrote:
>
> On 9/16/21 12:00, Martin Liška wrote:
> > As preparation for a new global object that will encapsulate
> > asm_out_file, we would need to live with a macro that will
> > define asm_out_file as casm->out_file and thus the name
> > can't be used in function arguments.
> >
> > I've built all cross compilers with the change and
> > can bootstrap on x86_64-linux-gnu and survives regression tests.
> >
> > Ready to be installed?
> > Thanks,
> > Martin
>
> May I please ping this renaming patch?

The patch is OK.

Thanks,
Richard.

> Thanks,
> Martin
>


[PATCH] doc: Fix documentation around 'asm' keyword in C++

2021-10-20 Thread Jonathan Wakely via Gcc-patches
The documentation on asm statements suggests asm is always a GNU
extension, but it's been part of ISO C++ since the first standard.

The documentation of -fno-asm is wrong for C++ as it states that it only
affects typeof, but actually it affects typeof and asm (despite asm
being part of ISO C++).

gcc/ChangeLog:

* doc/extend.texi (Basic Asm): Clarify that asm is not an
extension in C++.
* doc/invoke.texi (-fno-asm): Fix description for C++.

OK for trunk?

commit 0321c56153d385fda25ada73ca4e474358d7ec9c
Author: Jonathan Wakely 
Date:   Wed Oct 20 12:46:29 2021

doc: Fix documentation around 'asm' keyword in C++

The documentation on asm statements suggests asm is always a GNU
extension, but it's been part of ISO C++ since the first standard.

The documentation of -fno-asm is wrong for C++ as it states that it only
affects typeof, but actually it affects typeof and asm (despite asm
being part of ISO C++).

gcc/ChangeLog:

* doc/extend.texi (Basic Asm): Clarify that asm is not an
extension in C++.
* doc/invoke.texi (-fno-asm): Fix description for C++.

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 3c942d81c32..62280f6e00b 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9728,10 +9728,12 @@ A basic @code{asm} statement has the following syntax:
 asm @var{asm-qualifiers} ( @var{AssemblerInstructions} )
 @end example
 
-The @code{asm} keyword is a GNU extension.
-When writing code that can be compiled with @option{-ansi} and the
-various @option{-std} options, use @code{__asm__} instead of 
-@code{asm} (@pxref{Alternate Keywords}).
+For the C language, the @code{asm} keyword is a GNU extension.
+When writing C code that can be compiled with @option{-ansi} and the
+@option{-std} options that select a base standard, use @code{__asm__}
+instead of @code{asm} (@pxref{Alternate Keywords}).  For the C++
+language, @code{asm} is a standard keyword, but @code{__asm__} can
+be used for code compiled with @option{-fno-asm}.
 
 @subsubheading Qualifiers
 @table @code
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c93d822431f..6d1e328571a 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -2485,14 +2485,14 @@ supported for C as this construct is allowed by C++.
 Do not recognize @code{asm}, @code{inline} or @code{typeof} as a
 keyword, so that code can use these words as identifiers.  You can use
 the keywords @code{__asm__}, @code{__inline__} and @code{__typeof__}
-instead.  @option{-ansi} implies @option{-fno-asm}.
+instead.  In C, @option{-ansi} implies @option{-fno-asm}.
 
-In C++, this switch only affects the @code{typeof} keyword, since
-@code{asm} and @code{inline} are standard keywords.  You may want to
-use the @option{-fno-gnu-keywords} flag instead, which has the same
-effect.  In C99 mode (@option{-std=c99} or @option{-std=gnu99}), this
-switch only affects the @code{asm} and @code{typeof} keywords, since
-@code{inline} is a standard keyword in ISO C99.
+In C++, @code{inline} is a standard keyword and is not affected by
+this switch.  You may want to use the @option{-fno-gnu-keywords} flag
+instead, which disables @code{typeof} but not @code{asm} and
+@code{inline}.  In C99 mode (@option{-std=c99} or @option{-std=gnu99}),
+this switch only affects the @code{asm} and @code{typeof} keywords,
+since @code{inline} is a standard keyword in ISO C99.
 
 @item -fno-builtin
 @itemx -fno-builtin-@var{function}


Re: [PATCH] gcc-changelog: Add libffi/ to ignored_prefixes

2021-10-20 Thread Martin Liška

On 10/20/21 09:15, Martin Liška wrote:

On 10/20/21 01:23, H.J. Lu wrote:

Add libffi/ to ignored_prefixes for syncing with libffi upstream:


Sure, please push it.

Martin


Hello H.J.

Note the server hook is updated after you installed the patch.

Martin


[PATCH, v2, OpenMP 5.2, Fortran] Strictly-structured block support for OpenMP directives

2021-10-20 Thread Chung-Lin Tang

Hi Jakub,
this version adjusts the patch to let sections/parallel sections also use
strictly-structured blocks, making it more towards 5.2.

Because of this change, some of the testcases using the sections-construct need
a bit of adjustment too, since "block; end block" at the start of the construct
now means something different than before.

There are now three new testcases, with the non-dg-error/dg-error cases 
separated,
and a third testcase containing a few cases listed in prior emails. I hope this 
is
enough.

The implementation status entry in libgomp/libgomp.texi for strictly-structured 
blocks
has also been changed to "Y" in this patch.

Tested without regressions, is this now okay for trunk?

Thanks,
Chung-Lin

2021-10-20  Chung-Lin Tang  

gcc/fortran/ChangeLog:

* decl.c (gfc_match_end): Add COMP_OMP_STRICTLY_STRUCTURED_BLOCK case
together with COMP_BLOCK.
* parse.c (parse_omp_structured_block): Change return type to
'gfc_statement', add handling for strictly-structured block case, adjust
recursive calls to parse_omp_structured_block.
(parse_executable): Adjust calls to parse_omp_structured_block.
* parse.h (enum gfc_compile_state): Add
COMP_OMP_STRICTLY_STRUCTURED_BLOCK.
* trans-openmp.c (gfc_trans_omp_workshare): Add EXEC_BLOCK case
handling.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/cancel-1.f90: Adjust testcase.
* gfortran.dg/gomp/nesting-3.f90: Adjust testcase.
* gfortran.dg/gomp/strictly-structured-block-1.f90: New test.
* gfortran.dg/gomp/strictly-structured-block-2.f90: New test.
* gfortran.dg/gomp/strictly-structured-block-3.f90: New test.

libgomp/ChangeLog:

* libgomp.texi (Support of strictly structured blocks in Fortran):
Adjust to 'Y'.
* testsuite/libgomp.fortran/task-reduction-16.f90: Adjust testcase.
diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c
index d6a22d13451..66489da12be 100644
--- a/gcc/fortran/decl.c
+++ b/gcc/fortran/decl.c
@@ -8449,6 +8449,7 @@ gfc_match_end (gfc_statement *st)
   break;
 
 case COMP_BLOCK:
+case COMP_OMP_STRICTLY_STRUCTURED_BLOCK:
   *st = ST_END_BLOCK;
   target = " block";
   eos_ok = 0;
diff --git a/gcc/fortran/parse.c b/gcc/fortran/parse.c
index 7d765a0866d..2fb98844356 100644
--- a/gcc/fortran/parse.c
+++ b/gcc/fortran/parse.c
@@ -5451,7 +5451,7 @@ parse_oacc_loop (gfc_statement acc_st)
 
 /* Parse the statements of an OpenMP structured block.  */
 
-static void
+static gfc_statement
 parse_omp_structured_block (gfc_statement omp_st, bool workshare_stmts_only)
 {
   gfc_statement st, omp_end_st;
@@ -5538,6 +5538,32 @@ parse_omp_structured_block (gfc_statement omp_st, bool 
workshare_stmts_only)
   gcc_unreachable ();
 }
 
+  bool block_construct = false;
+  gfc_namespace *my_ns = NULL;
+  gfc_namespace *my_parent = NULL;
+
+  st = next_statement ();
+
+  if (st == ST_BLOCK)
+{
+  /* Adjust state to a strictly-structured block, now that we found that
+the body starts with a BLOCK construct.  */
+  s.state = COMP_OMP_STRICTLY_STRUCTURED_BLOCK;
+
+  block_construct = true;
+  gfc_notify_std (GFC_STD_F2008, "BLOCK construct at %C");
+
+  my_ns = gfc_build_block_ns (gfc_current_ns);
+  gfc_current_ns = my_ns;
+  my_parent = my_ns->parent;
+
+  new_st.op = EXEC_BLOCK;
+  new_st.ext.block.ns = my_ns;
+  new_st.ext.block.assoc = NULL;
+  accept_statement (ST_BLOCK);
+  st = parse_spec (ST_NONE);
+}
+
   do
 {
   if (workshare_stmts_only)
@@ -5554,7 +5580,6 @@ parse_omp_structured_block (gfc_statement omp_st, bool 
workshare_stmts_only)
 restrictions apply recursively.  */
  bool cycle = true;
 
- st = next_statement ();
  for (;;)
{
  switch (st)
@@ -5580,13 +5605,13 @@ parse_omp_structured_block (gfc_statement omp_st, bool 
workshare_stmts_only)
case ST_OMP_PARALLEL_MASKED:
case ST_OMP_PARALLEL_MASTER:
case ST_OMP_PARALLEL_SECTIONS:
- parse_omp_structured_block (st, false);
- break;
+ st = parse_omp_structured_block (st, false);
+ continue;
 
case ST_OMP_PARALLEL_WORKSHARE:
case ST_OMP_CRITICAL:
- parse_omp_structured_block (st, true);
- break;
+ st = parse_omp_structured_block (st, true);
+ continue;
 
case ST_OMP_PARALLEL_DO:
case ST_OMP_PARALLEL_DO_SIMD:
@@ -5609,7 +5634,7 @@ parse_omp_structured_block (gfc_statement omp_st, bool 
workshare_stmts_only)
}
}
   else
-   st = parse_executable (ST_NONE);
+   st = parse_executable (st);
   if (st == ST_NONE)
unexpected_eof ();
   else if (st == ST_OMP_SECTION
@@ -5619,9 +5644,27 @@ parse_omp_structured_block (

Re: [RFC] Remove VRP threader passes in exchange for better threading pre-VRP.

2021-10-20 Thread Andrew MacLeod via Gcc-patches

On 10/20/21 5:27 AM, Aldy Hernandez wrote:

On Wed, Oct 20, 2021 at 1:00 AM Jeff Law  wrote:



On 10/18/2021 8:03 AM, Aldy Hernandez wrote:


On 10/18/21 3:41 PM, Aldy Hernandez wrote:


I've been experimenting with reducing the total number of threading
passes, and I'd like to see if there's consensus/stomach for altering
the pipeline.  Note, that the goal is to remove forward threader
clients,
not the other way around.  So, we should prefer to remove a VRP threader
instance over a *.thread one immediately before VRP.

After some playing, it looks like if we enable fully-resolving mode in
the *.thread passes immediately preceeding VRP, we can remove the VRP
threading passes altogether, thus removing 2 threading passes (and
forward threading passes at that!).

It occurs to me that we could also remove the threading before VRP
passes, and enable a fully-resolving backward threader after VRP. I
haven't played with this scenario, but it should be just as good.
That being said, I don't know the intricacies of why we had both pre
and post VRP threading passes, and if one is ideally better than the
other.

The only post-VRP threading pass that (in my mind) makes sense is the
one sitting between VRP and DOM and it should replace the DOM based
threader.

Yes, that's the goal, but it won't happen on this release because of
floats.  The DOM threader uses the const/avails machinery to thread
conditionals involving floats, something the path solver can't do
because it depends on gori/ranger.  Adding floats to ranger is
probably our #1 task for the next cycle.

Now before Andrew gets clever, the relation oracle is technically type
agnostic, so it could theoretically be possible to use it in the DOM
threader and replace all the const/avails stuff.  But I'd like to go
on vacation at some point ;-).

Oh?  the float stuff isn't range related, just relations?  you can 
certainly register those and query/fold them




[PATCH] Attempt to resolve all incoming paths to a PHI.

2021-10-20 Thread Aldy Hernandez via Gcc-patches
The code that threads incoming paths to a PHI is duplicating what we
do generically in find_paths_to_names.  This shortcoming is actually
one of the reasons we aren't threading all possible paths into a PHI.
For example, we give up after finding one threadable path, but some
PHIs have multiple threadable paths:

  // x_5 = PHI <10(4), 20(5), ...>
  // if (x_5 > 5)

Addressing this not only fixes the oversight, but simplifies the
PHI handling code, since we can consider the PHI fully resolved upon
return.

Interestingly, for ssa-thread-12.c the main thread everything was
hinging on was unreachable.  With this patch, we call
maybe_register_path() earlier.  In doing so, the solver realizes
that any path starting with 4->8 is unreachable and can be avoided.
This caused the cascade of threadable paths that depended on this
to no longer happen.  Since threadable paths in thread[34] was the only
thing this test was testing, there's no longer anything to test.  Neat!

Tested on x86-64 Linux.

OK for trunk?

gcc/ChangeLog:

* tree-ssa-threadbackward.c (back_threader::resolve_phi):
Attempt to resolve all incoming paths to a PHI.
(back_threader::resolve_def): Always return true for PHIs.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr21090.c: Adjust for threading.
* gcc.dg/tree-ssa/ssa-thread-12.c: Removed.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr21090.c   |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c | 73 ---
 gcc/tree-ssa-threadbackward.c | 70 +-
 3 files changed, 21 insertions(+), 124 deletions(-)
 delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr21090.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr21090.c
index 3909adb72d4..92a87688601 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr21090.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr21090.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-vrp1 
-fdelete-null-pointer-checks" } */
+/* { dg-options "-O2 -fno-thread-jumps -fdisable-tree-evrp -fdump-tree-vrp1 
-fdelete-null-pointer-checks" } */
 
 int g, h;
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c
deleted file mode 100644
index 08c0b8d3bcc..000
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-thread3-details -fdump-tree-thread4-details 
-fno-finite-loops --param early-inlining-insns=14 -fno-inline-functions" } */
-/* { dg-final { scan-tree-dump "Registering jump thread" "thread3" } } */
-/* { dg-final { scan-tree-dump "Registering jump thread" "thread4" } } */
-
-typedef struct bitmap_head_def *bitmap;
-typedef const struct bitmap_head_def *const_bitmap;
-typedef struct VEC_int_base
-{
-}
-VEC_int_base;
-typedef struct VEC_int_heap
-{
-  VEC_int_base base;
-}
-VEC_int_heap;
-typedef unsigned long BITMAP_WORD;
-typedef struct bitmap_element_def
-{
-  struct bitmap_element_def *next;
-  unsigned int indx;
-}
-bitmap_element;
-typedef struct bitmap_head_def
-{
-}
-bitmap_head;
-typedef struct
-{
-  bitmap_element *elt1;
-  bitmap_element *elt2;
-  BITMAP_WORD bits;
-}
-bitmap_iterator;
-static __inline__ void
-bmp_iter_and_compl_init (bitmap_iterator * bi, const_bitmap map1,
-const_bitmap map2, unsigned start_bit,
-unsigned *bit_no)
-{
-}
-
-static __inline__ void
-bmp_iter_next (bitmap_iterator * bi, unsigned *bit_no)
-{
-}
-
-static __inline__ unsigned char
-bmp_iter_and_compl (bitmap_iterator * bi, unsigned *bit_no)
-{
-  if (bi->bits)
-{
-  while (bi->elt2 && bi->elt2->indx < bi->elt1->indx)
-   bi->elt2 = bi->elt2->next;
-}
-}
-
-extern int VEC_int_base_length (VEC_int_base *);
-bitmap
-compute_idf (bitmap def_blocks, bitmap_head * dfs)
-{
-  bitmap_iterator bi;
-  unsigned bb_index, i;
-  VEC_int_heap *work_stack;
-  bitmap phi_insertion_points;
-  while ((VEC_int_base_length (((work_stack) ? &(work_stack)->base : 0))) > 0)
-{
-  for (bmp_iter_and_compl_init
-  (&(bi), (&dfs[bb_index]), (phi_insertion_points), (0), &(i));
-  bmp_iter_and_compl (&(bi), &(i)); bmp_iter_next (&(bi), &(i)))
-   {
-   }
-}
-}
diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index edb396b3d6f..a6b9893abbd 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -83,7 +83,7 @@ private:
   edge maybe_register_path ();
   bool find_paths_to_names (basic_block bb, bitmap imports);
   bool resolve_def (tree name, bitmap interesting, vec &worklist);
-  bool resolve_phi (gphi *phi, bitmap imports);
+  void resolve_phi (gphi *phi, bitmap imports);
   edge find_taken_edge (const vec &path);
   edge find_taken_edge_cond (const vec &path, gcond *);
   edge find_taken_edge_switch (const vec &path, gswitch *);
@@ -243,17 +243,14 @@ populate_wo

Re: [PATCH] C, C++, OpenMP: Add 'has_device_addr' clause to 'target' construct

2021-10-20 Thread Jakub Jelinek via Gcc-patches
On Mon, Oct 18, 2021 at 06:17:20PM +0200, Marcel Vollweiler wrote:
> @@ -14255,6 +14257,16 @@ c_parser_omp_clause_use_device_addr (c_parser 
> *parser, tree list)
>  list);
>  }
>  
> +/* OpenMP 5.1:
> +   has_device_addr ( variable-list ) */
> +
> +static tree
> +c_parser_omp_clause_has_device_addr (c_parser *parser, tree list)
> +{
> +  return c_parser_omp_var_list_parens (parser, OMP_CLAUSE_HAS_DEVICE_ADDR,
> +list);
> +}
> +
>  /* OpenMP 4.5:
> is_device_ptr ( variable-list ) */
>  
> @@ -16945,6 +16957,10 @@ c_parser_omp_all_clauses (c_parser *parser, 
> omp_clause_mask mask,
> clauses = c_parser_omp_clause_use_device_addr (parser, clauses);
> c_name = "use_device_addr";
> break;
> + case PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR:
> +   clauses = c_parser_omp_clause_has_device_addr (parser, clauses);
> +   c_name = "has_device_addr";
> +   break;
>   case PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR:
> clauses = c_parser_omp_clause_is_device_ptr (parser, clauses);
> c_name = "is_device_ptr";
> @@ -20926,7 +20942,8 @@ c_parser_omp_target_exit_data (location_t loc, 
> c_parser *parser,
>   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
>   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP)   \
>   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \
> - | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
> + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR)\
> + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR))
>  
>  static bool
>  c_parser_omp_target (c_parser *parser, enum pragma_context context, bool 
> *if_p)

OpenMP 5.1 in [200:6-9] says:
The has_device_addr clause indicates ... The list items may include array 
sections.

This means in addition to the c-parser.c and parser.c changes you've done,
at least c_parser_omp_variable_list needs to change to include
OMP_CLAUSE_HAS_DEVICE_ADDR among
case OMP_CLAUSE_AFFINITY:
case OMP_CLAUSE_DEPEND:
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_IN_REDUCTION:
case OMP_CLAUSE_TASK_REDUCTION:
clauses (similarly for C++) and then {,c_}finish_omp_clauses needs to handle
it similarly to other clauses that can have array sections.
As it is a data sharing clause, I think the closest model (e.g. for
handle_omp_array_sections* purposes) is OMP_CLAUSE_*REDUCTION.
Then even the case when OMP_CLAUSE_DECL of the clause needs handling
similarly to other clauses that accept array sections.

> diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
> index 0aac978..d677592 100644
> --- a/gcc/c/c-typeck.c
> +++ b/gcc/c/c-typeck.c
> @@ -14054,7 +14054,7 @@ c_finish_omp_clauses (tree clauses, enum 
> c_omp_region_type ort)
>  {
>bitmap_head generic_head, firstprivate_head, lastprivate_head;
>bitmap_head aligned_head, map_head, map_field_head, map_firstprivate_head;
> -  bitmap_head oacc_reduction_head;
> +  bitmap_head oacc_reduction_head, has_device_addr_head, is_device_ptr_head;

I'd prefer not to add new bitmaps unless necessary, can't the clause use the
same bitmap together with is_device_ptr clause?  One can't specify something
both as is_device_ptr and has_device_addr at the same time...

> --- a/gcc/cp/parser.c
> +++ b/gcc/cp/parser.c
> @@ -36145,7 +36145,9 @@ cp_parser_omp_clause_name (cp_parser *parser)
>   result = PRAGMA_OMP_CLAUSE_GRAINSIZE;
> break;
>   case 'h':
> -   if (!strcmp ("hint", p))
> +   if (!strcmp ("has_device_addr", p))
> + result = PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR;
> +   else if (!strcmp ("hint", p))
>   result = PRAGMA_OMP_CLAUSE_HINT;
> else if (!strcmp ("host", p))
>   result = PRAGMA_OACC_CLAUSE_HOST;
> @@ -39830,6 +39832,11 @@ cp_parser_omp_all_clauses (cp_parser *parser, 
> omp_clause_mask mask,
>   clauses);
> c_name = "is_device_ptr";
> break;
> + case PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR:
> +   clauses = cp_parser_omp_var_list (parser, OMP_CLAUSE_HAS_DEVICE_ADDR,
> + clauses);
> +   c_name = "has_device_addr";
> +   break;
>   case PRAGMA_OMP_CLAUSE_IF:
> clauses = cp_parser_omp_clause_if (parser, clauses, token->location,
>true);
> @@ -44005,7 +44012,8 @@ cp_parser_omp_target_update (cp_parser *parser, 
> cp_token *pragma_tok,
>   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_DEFAULTMAP)   \
>   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_ALLOCATE) \
>   | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IN_REDUCTION) \
> - | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR))
> + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_IS_DEVICE_PTR)\
> + | (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_HAS_DEVICE_ADDR))
>  
>  static bool
>  cp_parser_omp_target (cp_parser *parser,

[committed][PATCH] libffi: Add LOCAL_PATCHES

2021-10-20 Thread H.J. Lu via Gcc-patches
* LOCAL_PATCHES: New file.
---
 libffi/LOCAL_PATCHES | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 libffi/LOCAL_PATCHES

diff --git a/libffi/LOCAL_PATCHES b/libffi/LOCAL_PATCHES
new file mode 100644
index 000..a377c28ce8d
--- /dev/null
+++ b/libffi/LOCAL_PATCHES
@@ -0,0 +1,2 @@
+5be7b66998127286fada45e4f23bd8a2056d553e
+4824ed41ba7cd63e60fd9f8769a58b79935a90d1
-- 
2.32.0



[PATCH] libffi: Add --enable-cet to configure

2021-10-20 Thread H.J. Lu via Gcc-patches
When --enable-cet is used to configure GCC, enable Intel CET in libffi.

* Makefile.am (AM_CFLAGS): Add $(CET_FLAGS).
(AM_CCASFLAGS): Likewise.
* configure.ac (CET_FLAGS): Add GCC_CET_FLAGS and AC_SUBST.
* Makefile.in: Regenerate.
* aclocal.m4: Likewise.
* configure: Likewise.
* fficonfig.h.in: Likewise.
* include/Makefile.in: Likewise.
* man/Makefile.in: Likewise.
* testsuite/Makefile.in: Likewise.
---
 libffi/Makefile.am   |  4 +-
 libffi/Makefile.in   |  7 ++-
 libffi/aclocal.m4|  2 +
 libffi/configure | 97 ++--
 libffi/configure.ac  |  4 ++
 libffi/include/Makefile.in   |  3 ++
 libffi/man/Makefile.in   |  3 ++
 libffi/testsuite/Makefile.in |  3 ++
 8 files changed, 116 insertions(+), 7 deletions(-)

diff --git a/libffi/Makefile.am b/libffi/Makefile.am
index 02e36176c67..c6d6f849c53 100644
--- a/libffi/Makefile.am
+++ b/libffi/Makefile.am
@@ -182,7 +182,7 @@ nodist_libffi_convenience_la_SOURCES = 
$(nodist_libffi_la_SOURCES)
 
 LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
 
-AM_CFLAGS = -Wall -g -fexceptions
+AM_CFLAGS = -Wall -g -fexceptions $(CET_FLAGS)
 if FFI_DEBUG
 # Build debug. Define FFI_DEBUG on the commandline so that, when building with
 # MSVC, it can link against the debug CRT.
@@ -218,7 +218,7 @@ libffi_la_LDFLAGS = -no-undefined $(libffi_version_info) 
$(libffi_version_script
 libffi_la_DEPENDENCIES = $(libffi_la_LIBADD) $(libffi_version_dep)
 
 AM_CPPFLAGS = -I. -I$(top_srcdir)/include -Iinclude -I$(top_srcdir)/src
-AM_CCASFLAGS = $(AM_CPPFLAGS)
+AM_CCASFLAGS = $(AM_CPPFLAGS) $(CET_FLAGS)
 
 # Multilib support.  Automake should provide these on its own.
 all-recursive: all-multi
diff --git a/libffi/Makefile.in b/libffi/Makefile.in
index 6ff0c67a779..5524a6a571e 100644
--- a/libffi/Makefile.in
+++ b/libffi/Makefile.in
@@ -99,7 +99,9 @@ subdir = .
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(top_srcdir)/../config/asmcfi.m4 \
+   $(top_srcdir)/../config/cet.m4 \
$(top_srcdir)/../config/depstand.m4 \
+   $(top_srcdir)/../config/enable.m4 \
$(top_srcdir)/../config/lead-dot.m4 \
$(top_srcdir)/../config/multi.m4 \
$(top_srcdir)/../config/override.m4 \
@@ -320,6 +322,7 @@ CCAS = @CCAS@
 CCASDEPMODE = @CCASDEPMODE@
 CCASFLAGS = @CCASFLAGS@
 CCDEPMODE = @CCDEPMODE@
+CET_FLAGS = @CET_FLAGS@
 CFLAGS = @CFLAGS@
 CPP = @CPP@
 CPPFLAGS = @CPPFLAGS@
@@ -586,7 +589,7 @@ libffi_convenience_la_LIBADD = $(libffi_la_LIBADD)
 libffi_convenience_la_DEPENDENCIES = $(libffi_la_DEPENDENCIES)
 nodist_libffi_convenience_la_SOURCES = $(nodist_libffi_la_SOURCES)
 LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
-AM_CFLAGS = -Wall -g -fexceptions $(am__append_2)
+AM_CFLAGS = -Wall -g -fexceptions $(CET_FLAGS) $(am__append_2)
 @LIBFFI_BUILD_VERSIONED_SHLIB_FALSE@libffi_version_script = 
 
@LIBFFI_BUILD_VERSIONED_SHLIB_GNU_TRUE@@LIBFFI_BUILD_VERSIONED_SHLIB_TRUE@libffi_version_script
 = -Wl,--version-script,libffi.map
 
@LIBFFI_BUILD_VERSIONED_SHLIB_SUN_TRUE@@LIBFFI_BUILD_VERSIONED_SHLIB_TRUE@libffi_version_script
 = -Wl,-M,libffi.map-sun
@@ -597,7 +600,7 @@ libffi_version_info = -version-info `grep -v '^\#' 
$(srcdir)/libtool-version`
 libffi_la_LDFLAGS = -no-undefined $(libffi_version_info) 
$(libffi_version_script) $(LTLDFLAGS) $(AM_LTLDFLAGS)
 libffi_la_DEPENDENCIES = $(libffi_la_LIBADD) $(libffi_version_dep)
 AM_CPPFLAGS = -I. -I$(top_srcdir)/include -Iinclude -I$(top_srcdir)/src
-AM_CCASFLAGS = $(AM_CPPFLAGS)
+AM_CCASFLAGS = $(AM_CPPFLAGS) $(CET_FLAGS)
 MULTISRCTOP = 
 MULTIBUILDTOP = 
 MULTIDIRS = 
diff --git a/libffi/aclocal.m4 b/libffi/aclocal.m4
index 9c8c88f8ca6..736ec308d5b 100644
--- a/libffi/aclocal.m4
+++ b/libffi/aclocal.m4
@@ -1189,7 +1189,9 @@ AC_SUBST([am__untar])
 
 m4_include([../config/acx.m4])
 m4_include([../config/asmcfi.m4])
+m4_include([../config/cet.m4])
 m4_include([../config/depstand.m4])
+m4_include([../config/enable.m4])
 m4_include([../config/lead-dot.m4])
 m4_include([../config/multi.m4])
 m4_include([../config/override.m4])
diff --git a/libffi/configure b/libffi/configure
index 4bababb87f5..575641cca1d 100755
--- a/libffi/configure
+++ b/libffi/configure
@@ -692,6 +692,7 @@ am__fastdepCCAS_TRUE
 CCASDEPMODE
 CCASFLAGS
 CCAS
+CET_FLAGS
 am__fastdepCXX_FALSE
 am__fastdepCXX_TRUE
 CXXDEPMODE
@@ -802,6 +803,7 @@ enable_multilib
 enable_silent_rules
 enable_generated_files_in_srcdir
 enable_dependency_tracking
+enable_cet
 enable_shared
 enable_static
 with_pic
@@ -1457,6 +1459,7 @@ Optional Features:
   do not reject slow dependency extractors
   --disable-dependency-tracking
   speeds up one-time build
+  --enable-cetenable Intel CET in target libraries [default=auto]
   --enable-shared[=PKGS]  build shared libraries

Re: [match.pd] PR83750 - CSE erf/erfc pair

2021-10-20 Thread Richard Biener via Gcc-patches
On Wed, 20 Oct 2021, Prathamesh Kulkarni wrote:

> On Tue, 19 Oct 2021 at 16:55, Richard Biener  wrote:
> >
> > On Tue, 19 Oct 2021, Prathamesh Kulkarni wrote:
> >
> > > On Tue, 19 Oct 2021 at 13:02, Richard Biener  
> > > wrote:
> > > >
> > > > On Tue, Oct 19, 2021 at 9:03 AM Prathamesh Kulkarni via Gcc-patches
> > > >  wrote:
> > > > >
> > > > > On Mon, 18 Oct 2021 at 17:23, Richard Biener  
> > > > > wrote:
> > > > > >
> > > > > > On Mon, 18 Oct 2021, Prathamesh Kulkarni wrote:
> > > > > >
> > > > > > > On Mon, 18 Oct 2021 at 17:10, Richard Biener  
> > > > > > > wrote:
> > > > > > > >
> > > > > > > > On Mon, 18 Oct 2021, Prathamesh Kulkarni wrote:
> > > > > > > >
> > > > > > > > > On Mon, 18 Oct 2021 at 16:18, Richard Biener 
> > > > > > > > >  wrote:
> > > > > > > > > >
> > > > > > > > > > On Mon, 18 Oct 2021, Prathamesh Kulkarni wrote:
> > > > > > > > > >
> > > > > > > > > > > Hi Richard,
> > > > > > > > > > > As suggested in PR, I have attached WIP patch that adds 
> > > > > > > > > > > two patterns
> > > > > > > > > > > to match.pd:
> > > > > > > > > > > erfc(x) --> 1 - erf(x) if canonicalize_math_p() and,
> > > > > > > > > > > 1 - erf(x) --> erfc(x) if !canonicalize_math_p().
> > > > > > > > > > >
> > > > > > > > > > > This works to remove call to erfc for the following test:
> > > > > > > > > > > double f(double x)
> > > > > > > > > > > {
> > > > > > > > > > >   double g(double, double);
> > > > > > > > > > >
> > > > > > > > > > >   double t1 = __builtin_erf (x);
> > > > > > > > > > >   double t2 = __builtin_erfc (x);
> > > > > > > > > > >   return g(t1, t2);
> > > > > > > > > > > }
> > > > > > > > > > >
> > > > > > > > > > > with .optimized dump shows:
> > > > > > > > > > >   t1_2 = __builtin_erf (x_1(D));
> > > > > > > > > > >   t2_3 = 1.0e+0 - t1_2;
> > > > > > > > > > >
> > > > > > > > > > > However, for the following test:
> > > > > > > > > > > double f(double x)
> > > > > > > > > > > {
> > > > > > > > > > >   double g(double, double);
> > > > > > > > > > >
> > > > > > > > > > >   double t1 = __builtin_erfc (x);
> > > > > > > > > > >   return t1;
> > > > > > > > > > > }
> > > > > > > > > > >
> > > > > > > > > > > It canonicalizes erfc(x) to 1 - erf(x), but does not 
> > > > > > > > > > > transform 1 -
> > > > > > > > > > > erf(x) to erfc(x) again
> > > > > > > > > > > post canonicalization.
> > > > > > > > > > > -fdump-tree-folding shows that 1 - erf(x) --> erfc(x) 
> > > > > > > > > > > gets applied,
> > > > > > > > > > > but then it tries to
> > > > > > > > > > > resimplify erfc(x), which fails post canonicalization. So 
> > > > > > > > > > > we end up
> > > > > > > > > > > with erfc(x) transformed to
> > > > > > > > > > > 1 - erf(x) in .optimized dump, which I suppose isn't 
> > > > > > > > > > > ideal.
> > > > > > > > > > > Could you suggest how to proceed ?
> > > > > > > > > >
> > > > > > > > > > I applied your patch manually and it does the intended
> > > > > > > > > > simplifications so I wonder what I am missing?
> > > > > > > > > Would it be OK to always fold erfc(x) -> 1 - erf(x) even when 
> > > > > > > > > there's
> > > > > > > > > no erf(x) in the source ?
> > > > > > > >
> > > > > > > > I do think it's reasonable to expect erfc to be available when 
> > > > > > > > erf
> > > > > > > > is and vice versa but note both are C99 specified functions 
> > > > > > > > (either
> > > > > > > > requires -lm).
> > > > > > > OK, thanks. Would it be OK to commit the patch after 
> > > > > > > bootstrap+test ?
> > > > > >
> > > > > > Yes, but I'm confused because you say the patch doesn't work for 
> > > > > > you?
> > > > > The patch works for me to CSE erf/erfc pair.
> > > > > However when there's only erfc in the source, it canonicalizes erfc(x)
> > > > > to 1 - erf(x) but later fails to uncanonicalize 1 - erf(x) back to
> > > > > erfc(x)
> > > > > with -O3 -funsafe-math-optimizations.
> > > > >
> > > > > For,
> > > > > t1 = __builtin_erfc(x),
> > > > >
> > > > > .optimized dump shows:
> > > > >   _2 = __builtin_erf (x_1(D));
> > > > >   t1_3 = 1.0e+0 - _2;
> > > > >
> > > > > and for,
> > > > > double t1 = x + __builtin_erfc(x);
> > > > >
> > > > > .optimized dump shows:
> > > > >   _3 = __builtin_erf (x_2(D));
> > > > >   _7 = x_2(D) + 1.0e+0;
> > > > >   t1_4 = _7 - _3;
> > > > >
> > > > > I assume in both cases, we want erfc in the code-gen instead ?
> > > > > I think the reason uncaonicalization fails is because the pattern 1 -
> > > > > erf(x) to erfc(x)
> > > > > gets applied, but then it fails in resimplifying erfc(x), and we end
> > > > > up with 1 - erf(x) in code-gen.
> > > > >
> > > > > From gimple-match.c, it hits the simplification:
> > > > >
> > > > > gimple_seq *lseq = seq;
> > > > > if (__builtin_expect (!dbg_cnt
> > > > > (match), 0)) goto next_after_fail1172;
> > > > > if (__builtin_expect (dump_file &&
> > > > > (dump_flags & TDF_FOLDING), 0)) fprintf (dump_file, "Applying pa

Re: [PATCH] gcc-changelog: Add libffi/ to ignored_prefixes

2021-10-20 Thread H.J. Lu via Gcc-patches
On Wed, Oct 20, 2021 at 5:30 AM Martin Liška  wrote:
>
> On 10/20/21 09:15, Martin Liška wrote:
> > On 10/20/21 01:23, H.J. Lu wrote:
> >> Add libffi/ to ignored_prefixes for syncing with libffi upstream:
> >
> > Sure, please push it.
> >
> > Martin
>
> Hello H.J.
>
> Note the server hook is updated after you installed the patch.
>

Yes.  I pushed my libffi sync patch set and this commit

commit 90454a900824d96e6d4eae557a809c9d986198d9
Author: H.J. Lu 
Date:   Wed Oct 20 05:46:15 2021 -0700

libffi: Update ChangeLog.libffi for libffi 3.4.2

* ChangeLog.libffi: Copied from ChangeLog.old in libffi 3.4.2.

to sync libffi with libffi 3.4.2.

Thanks.

-- 
H.J.


[PATCH v2] libstdc++: Add support for POWER9 DARN instruction to std::random_device

2021-10-20 Thread Jonathan Wakely via Gcc-patches

On 20/10/21 10:12 +0100, Jonathan Wakely wrote:

On 19/10/21 17:47 +0100, Jonathan Wakely wrote:

The ISA-3.0 instruction set includes DARN ("deliver a random number")
which can be used similar to the existing support for RDRAND and RDSEED.

libstdc++-v3/ChangeLog:

* src/c++11/random.cc (USE_DARN): Define.
(__ppc_darn): New function to use POWER9 DARN instruction.
(Which): Add 'darn' enumerator.
(which_source): Check for __ppc_darn.
(random_device::_M_init): Support "darn" and "hw" tokens.
(random_device::_M_getentropy): Add darn to switch.
* testsuite/26_numerics/random/random_device/cons/token.cc:
Check "darn" token.
* testsuite/26_numerics/random/random_device/entropy.cc:
Likewise.

Tested powerpc64le-linux (power8 and power9) and x86_64-linux.

The new "darn" (power-specific) and "hw" (x86 and power)
strings should be documented, but I'll do that if this gets committed.

Most of this patch is just "more of the same", similar to the existing
code for RDRAND and RDSEED on x86, but the parts of the patch I'd like
more eyes on are:


+#elif defined __powerpc__ && defined __BUILTIN_CPU_SUPPORTS__
+# define USE_DARN 1
#endif


This means DARN can only be used when __builtin_cpu_supports is
available, which means glibc 2.23 ... is that acceptable? It means
RHEL 7 wouldn't be able to use DARN, but RHEL 8 would.

There certainly are POWER9 machines running RHEL 7 and similar
vintages (the GCC compile farm has one) so if there's another way to
check for ISA 3.0 then I could use that.

If __POWER9_VECTOR__ is defined when building libstdc++, presumably
that means the whole library can only be run on POWER9 hardware. So
would that mean we don't need to check __builtin_cpu_supports("darn")
when __POWER9_VECTOR__ is defined? Or is it possible to build with
-mcpu=power8 -mpower9-vector and run it on h/w without the DARN
instruction?

Also, I forgot to add a configure check that the assembler supports
darn, which is another prerequisite for using it here.


@@ -135,6 +137,15 @@ namespace std _GLIBCXX_VISIBILITY(default)
#endif
#endif

+#ifdef USE_DARN
+unsigned int
+__attribute__((target("power9")))


Oops, that should be "cpu=power9".

With that change it works on a POWER9 machine (9009-42A) with glibc
2.34 and binutils 2.35.



Here's the updated patch with a configure check for assembler support,
and the target attribute fixed.

This still requires Glibc 2.23 for __builtin_cpu_supports, which I'm
assuming is acceptable.


commit a6f925407dd05c593b230da1627435adc53584f8
Author: Jonathan Wakely 
Date:   Wed Oct 20 09:25:24 2021

libstdc++: Add support for POWER9 DARN instruction to std::random_device

The ISA-3.0 instruction set includes DARN ("deliver a random number")
which can be used similar to the existing support for RDRAND and RDSEED.

libstdc++-v3/ChangeLog:

* acinclude.m4 (GLIBCXX_CHECK_PPC_DARN): Check assembler.
* config.h.in: Regenerate.
* configure: Regenerate.
* configure.ac: Use GLIBCXX_CHECK_PPC_DARN.
* src/c++11/random.cc [_GLIBCXX_PPC_DARN] (USE_DARN): Define.
(__ppc_darn): New function to use POWER9 DARN instruction.
(Which): Add 'darn' enumerator.
(which_source): Check for __ppc_darn.
(random_device::_M_init): Support "darn" and "hw" tokens.
(random_device::_M_getentropy): Add darn to switch.
* testsuite/26_numerics/random/random_device/cons/token.cc:
Check "darn" token.
* testsuite/26_numerics/random/random_device/entropy.cc:
Likewise.

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 90ecc4a87a2..9ff9ceb20ac 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -4100,6 +4100,27 @@ AC_DEFUN([GLIBCXX_CHECK_X86_RDSEED], [
   AC_MSG_RESULT($ac_cv_x86_rdseed)
 ])
 
+dnl
+dnl Check whether darn is supported in the assembler.
+AC_DEFUN([GLIBCXX_CHECK_PPC_DARN], [
+  AC_MSG_CHECKING([for darn support in assembler])
+  AC_CACHE_VAL(ac_cv_ppc_darn, [
+  ac_cv_ppc_darn=no
+  case "$target" in
+powerpc*-*-*)
+AC_TRY_COMPILE(, [
+  signed int x;
+  __asm__ __volatile__ (".machine power9; darn %0,0;": "=r" (x));
+], [ac_cv_ppc_darn=yes], [ac_cv_ppc_darn=no])
+  esac
+  ])
+  if test $ac_cv_ppc_darn = yes; then
+AC_DEFINE(_GLIBCXX_PPC_DARN, 1,
+		[ Defined if as can handle darn. ])
+  fi
+  AC_MSG_RESULT($ac_cv_ppc_darn)
+])
+
 dnl
 dnl Check whether get_nprocs is available in , and define _GLIBCXX_USE_GET_NPROCS.
 dnl
diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
index 2d68b3672b9..1189c68c380 100644
--- a/libstdc++-v3/configure.ac
+++ b/libstdc++-v3/configure.ac
@@ -467,6 +467,8 @@ GCC_CHECK_ASSEMBLER_HWCAP
 GLIBCXX_CHECK_X86_RDRAND
 # Check if assembler supports rdseed opcode.
 GLIBCXX_CHECK_X86_RDSEED
+# Check if assembler supports darn 

Re: [RFC] Remove VRP threader passes in exchange for better threading pre-VRP.

2021-10-20 Thread Aldy Hernandez via Gcc-patches
On Wed, Oct 20, 2021 at 2:32 PM Andrew MacLeod  wrote:
>
> On 10/20/21 5:27 AM, Aldy Hernandez wrote:

> Oh?  the float stuff isn't range related, just relations?  you can
> certainly register those and query/fold them

See?  I knew you'd get a bright idea.  No.  There's no time in this release :-).

And yes, the DOM threader's folding of float conditionals is limited
to relationals.  But why spend any time there (forward threader) when
next year we'll have floats and can replace the entire DOM threader
with a backward threader.

Aldy



Re: [PATCH] X86: Add an option -muse-unaligned-vector-move

2021-10-20 Thread H.J. Lu via Gcc-patches
On Wed, Oct 20, 2021 at 4:18 AM Richard Biener
 wrote:
>
> On Wed, Oct 20, 2021 at 12:40 PM Xu Dianhong  wrote:
> >
> > Many thanks for your explanation. I got the meaning of operands.
> > The "addpd b(%rip), %xmm0" instruction needs "b(%rip)" aligned otherwise it 
> > will rise a "Real-Address Mode Exceptions".
> > I haven't considered this situation  "b(%rip)" has an address dependence of 
> > "a(%rip)" before. I think this situation could be resolved on the assembler 
> > side except for this dummy code like "movapd 0x200b37(%rip),%xmm1, ... 
> > addpd  0x200b37(%rip),%xmm0 ".
>
> Of course the compiler will only emit instructions which have the
> constraint of aligned memory
> when the memory is known to be aligned.  That's why I wonder why you
> would need such
> option.  "Real-Address Mode Exceptions" may point to the issue, but I
> wonder what's different
> in real mode vs. protected mode - even with segmentation the alignment
> of objects should
> prevail unless you play linker"tricks" that make global objects have
> different alignment - but
> then it's better to adjust the respective hooks to not falsely claim
> such alignment.  Consider
> for example
>
>if ((uintptr_t)&a & 0x7)
>  foo();
>   else
>  bar();
>
> GCC will optimize the branch statically to always call foo if 'a'
> appears to be aligned,
> even if you later try to "override" this with an option.  Alignment is
> not only about
> moves, it's also about knowledge about low bits in addresses and about
> alias analysis where alignment constrains how two objects can overlap.
>
> So - do not lie to the compiler!  A late "workaround" avoiding aligned
> SSE moves isn't a proper fix.
>

The motivations are

1.  AVX non-load/store ops work on unaligned memory.   Unaligned
load/store on aligned memory is as fast as aligned load/store on Intel
AVX machines.   The new switch makes load/store consistent with
other AVX ops.
2. We don't properly align the stack for AVX on Windows.  This can
be used as a workaround for -mavx on Windows.

We can change TARGET_USE_UNALIGNED_VECTOR_MOVE
to require AVX.

-- 
H.J.


[PATCH][RFC] Map -ftrapv to -fsanitize=signed-integer-overflow -fsanitize-undefined-trap-on-error

2021-10-20 Thread Richard Biener via Gcc-patches
This maps -ftrapv to -fsanitize=signed-integer-overflow
-fsanitize-undefined-trap-on-error, effectively removing
flag_trapv (or rather making it always false).

This has implications on language support - while -ftrapv
was formerly universally available the mapping restricts it
to the C family of frontends.

It also raises questions on mixing -ftrapv with -fsanitize
flags, specifically with other recovery options for the
undefined sanitizer since -fsanitize-undefined-trap-on-error
cannot be restricted to the signed-integer-overflow part at
the moment.  To more closely map behavior we could add
-fsanitize=trapv where with a single option we could also
simply alias -ftrapv to that.

Code quality wise a simple signed add compiles to

movl%edi, %eax
addl%esi, %eax
jo  .L5
...
.L5:
ud2

compared to

call__addvsi3

and it has less of the bugs -ftrapv has.  The IL will
not contain a PLUS_EXPR but a .UBSAN_CHECK_ADD internal
function call which has rudimentary support throughout
optimizers but is not recognized as possibly terminating
the program so

int foo (int i, int j, int *p, int k)
{
  int tem = i + j;
  *p = 0;
  if (k)
return tem;
  return 0;
}

will be optimized to perform the add only conditional
and the possibly NULL *p dereference first (note the
same happens with the "legacy" -ftrapv).  The behavior
with -fnon-call-exceptions is also different as the
internal functions are marked as not throwing and
as seen above the actual kind of trap can change (SIGILL
vs. SIGABRT).

One question is whether -ftrapv makes signed integer overflow
well-defined (to trap) like -fwrapv makes it wrap.  If so
the the above behavior is ill-formed.  Not sure how
sanitizers position themselves with respect to this and
whether the current behavior is OK there.  The patch below
instruments signed integer ops but leaves them undefined
so the compiler still has to be careful as to not introduce
new signed overflow (but at least that won't trap).
Currently -fwrapv -fsanitize=signed-integer-overflow will
not instrument any signed operations for example.

I do consider the option to simply make -ftrapv do nothing
but warn that people should use UBSAN - that wouldn't
imply semantics are 1:1 the same (which they are not).

Bootstrapped and tested on x86_64-unknown-linux-gnu, regresses

FAIL: gcc.dg/vect/trapv-vect-reduc-4.c scan-tree-dump-times vect "Detected 
reduc
tion." 3
FAIL: gcc.dg/vect/trapv-vect-reduc-4.c scan-tree-dump-times vect "using an 
in-or
der (fold-left) reduction" 1
FAIL: gcc.dg/vect/trapv-vect-reduc-4.c scan-tree-dump-times vect 
"vectorized 3 l
oops" 1

where the vectorizer doesn't know the UBSAN IFNs.

2021-10-20  Richard Biener  

* opts.c (common_handle_option): Handle -ftrapv like
-fsanitize=signed-integer-overflow
-fsanitize-undefined-trap-on-error and do not set
flag_trapv.
---
 gcc/opts.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/opts.c b/gcc/opts.c
index 65fe192a198..909d2a031ff 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -3022,7 +3022,21 @@ common_handle_option (struct gcc_options *opts,
 
 case OPT_ftrapv:
   if (value)
-   opts->x_flag_wrapv = 0;
+   {
+ opts->x_flag_wrapv = 0;
+ opts->x_flag_sanitize
+   = parse_sanitizer_options ("signed-integer-overflow",
+  loc, code, opts->x_flag_sanitize,
+  value, false);
+ if (!opts_set->x_flag_sanitize_undefined_trap_on_error)
+   opts->x_flag_sanitize_undefined_trap_on_error = 1;
+ /* This keeps overflow undefined and not trap.  Specifically
+it does no longer allow to catch exceptions together with
+-fnon-call-exceptions.  It also makes -ftrapv cease to
+work with non-C-family languages since ubsan only works for
+those.  */
+ opts->x_flag_trapv = 0;
+   }
   break;
 
 case OPT_fstrict_overflow:
-- 
2.31.1


Re: [PATCH 1v2/3][vect] Add main vectorized loop unrolling

2021-10-20 Thread Andre Vieira (lists) via Gcc-patches

On 15/10/2021 09:48, Richard Biener wrote:

On Tue, 12 Oct 2021, Andre Vieira (lists) wrote:


Hi Richi,

I think this is what you meant, I now hide all the unrolling cost calculations
in the existing target hooks for costs. I did need to adjust 'finish_cost' to
take the loop_vinfo so the target's implementations are able to set the newly
renamed 'suggested_unroll_factor'.

Also added the checks for the epilogue's VF.

Is this more like what you had in mind?

Not exactly (sorry..).  For the target hook I think we don't want to
pass vec_info but instead another output parameter like the existing
ones.

vect_estimate_min_profitable_iters should then via
vect_analyze_loop_costing and vect_analyze_loop_2 report the unroll
suggestion to vect_analyze_loop which should then, if the suggestion
was > 1, instead of iterating to the next vector mode run again
with a fixed VF (old VF times suggested unroll factor - there's
min_vf in vect_analyze_loop_2 which we should adjust to
the old VF times two for example and maybe store the suggested
factor as hint) - if it succeeds the result will end up in the
list of considered modes (where we now may have more than one
entry for the same mode but a different VF), we probably want to
only consider more unrolling once.

For simplicity I'd probably set min_vf = max_vf = old VF * suggested
factor, thus take the targets request literally.

Richard.


Hi,

I now pass an output parameter to finish_costs and route it through the 
various calls up to vect_analyze_loop.  I tried to rework 
vect_determine_vectorization_factor and noticed that merely setting 
min_vf and max_vf is not enough, we only use these to check whether the 
vectorization factor is within range, well actually we only use max_vf 
at that stage. We only seem to use 'min_vf' to make sure the 
data_references are valid.  I am not sure my changes are the most 
appropriate here, for instance I am pretty sure the checks for max and 
min vf I added in vect_determine_vectorization_factor are currently 
superfluous as they will pass by design, but thought they might be good 
future proofing?


Also I changed how we compare against max_vf, rather than relying on the 
'MAX_VECTORIZATION' I decided to use the estimated_poly_value with 
POLY_VALUE_MAX, to be able to bound it further in case we have knowledge 
of the VL. I am not entirely about the validity of this change, maybe we 
are better off keeping the MAX_VECTORIZATION in place and not making any 
changes to max_vf for unrolling.


What do you think?
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
36519ccc5a58abab483c38d0a6c5f039592bfc7f..9b1e01e9b62050d7e34bc55454771e40bdbdb4cb
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -15972,8 +15972,8 @@ aarch64_adjust_body_cost (aarch64_vector_costs *costs, 
unsigned int body_cost)
 
 /* Implement TARGET_VECTORIZE_FINISH_COST.  */
 static void
-aarch64_finish_cost (void *data, unsigned *prologue_cost,
-unsigned *body_cost, unsigned *epilogue_cost)
+aarch64_finish_cost (void *data, unsigned *prologue_cost, unsigned *body_cost,
+unsigned *epilogue_cost, unsigned *suggested_unroll_factor)
 {
   auto *costs = static_cast (data);
   *prologue_cost = costs->region[vect_prologue];
@@ -15984,6 +15984,9 @@ aarch64_finish_cost (void *data, unsigned 
*prologue_cost,
   && costs->vec_flags
   && aarch64_use_new_vector_costs_p ())
 *body_cost = aarch64_adjust_body_cost (costs, *body_cost);
+
+  if(suggested_unroll_factor)
+*suggested_unroll_factor = 1;
 }
 
 /* Implement TARGET_VECTORIZE_DESTROY_COST_DATA.  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 
afc2674d49da370ae0f5ef277df7e9954f303b8e..a48e43879512793907fef946c1575c3ed7f68092
 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -23048,13 +23048,15 @@ ix86_add_stmt_cost (class vec_info *vinfo, void 
*data, int count,
 /* Implement targetm.vectorize.finish_cost.  */
 
 static void
-ix86_finish_cost (void *data, unsigned *prologue_cost,
- unsigned *body_cost, unsigned *epilogue_cost)
+ix86_finish_cost (void *data, unsigned *prologue_cost, unsigned *body_cost,
+ unsigned *epilogue_cost, unsigned *suggested_unroll_factor)
 {
   unsigned *cost = (unsigned *) data;
   *prologue_cost = cost[vect_prologue];
   *body_cost = cost[vect_body];
   *epilogue_cost = cost[vect_epilogue];
+  if (suggested_unroll_factor)
+*suggested_unroll_factor = 1;
 }
 
 /* Implement targetm.vectorize.destroy_cost_data.  */
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 
ad81dfb316dff00cde810d6b1edd31fa49d5c1e8..59d30ad6fcd1758383c52e34a0f90a126c501ec3
 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5551,8 +5551,8 @@ rs6000_adjust_vect_cost_per_loop (rs6000_cost_data *data)
 /* Implement targetm.vectorize.finish_cost.  */
 
 static void
-rs6000_finish_cost (v

[PATCH] calls.c: Remove some dead code and target hooks

2021-10-20 Thread Alex Coplan via Gcc-patches
Hi all,

Looking at calls.c:initialize_argument_information, I spotted some dead
code that seems to have been left behind from when MPX support was
removed.

This patch removes that code as well as the associated target hooks
(which appear to be unused).

Bootstrapped and regtested on aarch64-linux-gnu and x86_64-linux-gnu, no
regressions.

OK for trunk?

Thanks,
Alex

gcc/ChangeLog:

* calls.c (initialize_argument_information): Remove some dead
code, remove handling for function_arg returning const_int.
* doc/tm.texi: Delete documentation for unused target hooks.
* doc/tm.texi.in: Likewise.
* target.def (load_bounds_for_arg): Delete.
(store_bounds_for_arg): Delete.
(load_returned_bounds): Delete.
(store_returned_bounds): Delete.
* targhooks.c (default_load_bounds_for_arg): Delete.
(default_store_bounds_for_arg): Delete.
(default_load_returned_bounds): Delete.
(default_store_returned_bounds): Delete.
* targhooks.h (default_load_bounds_for_arg): Delete.
(default_store_bounds_for_arg): Delete.
(default_load_returned_bounds): Delete.
(default_store_returned_bounds): Delete.
diff --git a/gcc/calls.c b/gcc/calls.c
index e50d3fc3b62..27b59f26ad3 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1287,8 +1287,6 @@ initialize_argument_information (int num_actuals 
ATTRIBUTE_UNUSED,
   args_size->constant = 0;
   args_size->var = 0;
 
-  bitmap_obstack_initialize (NULL);
-
   /* In this loop, we consider args in the order they are written.
  We fill up ARGS from the back.  */
 
@@ -1297,7 +1295,6 @@ initialize_argument_information (int num_actuals 
ATTRIBUTE_UNUSED,
 int j = i;
 call_expr_arg_iterator iter;
 tree arg;
-bitmap slots = NULL;
 
 if (struct_value_addr_value)
   {
@@ -1324,13 +1321,8 @@ initialize_argument_information (int num_actuals 
ATTRIBUTE_UNUSED,
j--;
argpos++;
   }
-
-if (slots)
-  BITMAP_FREE (slots);
   }
 
-  bitmap_obstack_release (NULL);
-
   /* I counts args in order (to be) pushed; ARGPOS counts in order written.  */
   for (argpos = 0; argpos < num_actuals; i--, argpos++)
 {
@@ -1493,9 +1485,6 @@ initialize_argument_information (int num_actuals 
ATTRIBUTE_UNUSED,
 
   args[i].reg = targetm.calls.function_arg (args_so_far, arg);
 
-  if (args[i].reg && CONST_INT_P (args[i].reg))
-   args[i].reg = NULL;
-
   /* If this is a sibling call and the machine has register windows, the
 register window has to be unwinded before calling the routine, so
 arguments have to go into the incoming registers.  */
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 902402d7503..990152f5b15 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -4079,12 +4079,6 @@ The return value is usually either a @code{reg} RTX for 
the hard
 register in which to pass the argument, or zero to pass the argument
 on the stack.
 
-The return value can be a @code{const_int} which means argument is
-passed in a target specific slot with specified number.  Target hooks
-should be used to store or load argument in such case.  See
-@code{TARGET_STORE_BOUNDS_FOR_ARG} and @code{TARGET_LOAD_BOUNDS_FOR_ARG}
-for more information.
-
 The value of the expression can also be a @code{parallel} RTX@.  This is
 used when an argument is passed in multiple locations.  The mode of the
 @code{parallel} should be the mode of the entire argument.  The
@@ -5430,37 +5424,6 @@ defined, then define this hook to return @code{true} if
 Otherwise, you should not define this hook.
 @end deftypefn
 
-@deftypefn {Target Hook} rtx TARGET_LOAD_BOUNDS_FOR_ARG (rtx @var{slot}, rtx 
@var{arg}, rtx @var{slot_no})
-This hook is used by expand pass to emit insn to load bounds of
-@var{arg} passed in @var{slot}.  Expand pass uses this hook in case
-bounds of @var{arg} are not passed in register.  If @var{slot} is a
-memory, then bounds are loaded as for regular pointer loaded from
-memory.  If @var{slot} is not a memory then @var{slot_no} is an integer
-constant holding number of the target dependent special slot which
-should be used to obtain bounds.  Hook returns RTX holding loaded bounds.
-@end deftypefn
-
-@deftypefn {Target Hook} void TARGET_STORE_BOUNDS_FOR_ARG (rtx @var{arg}, rtx 
@var{slot}, rtx @var{bounds}, rtx @var{slot_no})
-This hook is used by expand pass to emit insns to store @var{bounds} of
-@var{arg} passed in @var{slot}.  Expand pass uses this hook in case
-@var{bounds} of @var{arg} are not passed in register.  If @var{slot} is a
-memory, then @var{bounds} are stored as for regular pointer stored in
-memory.  If @var{slot} is not a memory then @var{slot_no} is an integer
-constant holding number of the target dependent special slot which
-should be used to store @var{bounds}.
-@end deftypefn
-
-@deftypefn {Target Hook} rtx TARGET_LOAD_RETURNED_BOUNDS (rtx @var{slot})
-This hook is used by expand pass to emit insn to load bounds
-r

Re: [PATCH] Try to resolve paths in threader without looking further back.

2021-10-20 Thread Martin Sebor via Gcc-patches

On 10/20/21 4:28 AM, Aldy Hernandez via Gcc-patches wrote:

Sometimes we can solve a candidate path without having to recurse
further back.  This can mostly happen in fully resolving mode, because
we can ask the ranger what the range on entry to the path is, but
there's no reason this can't always apply.  This one-liner removes
the fully-resolving restriction.

I'm tickled pink to see how many things we now get quite early
in the compilation.  I actually had to disable jump threading entirely
for a few tests because the early threader was catching things
disturbingly early.  Also, as Richi predicted, I saw a lot of pre-VRP
cleanups happening.

I was going to commit this as obvious, but I think the test changes
merit discussion.

We've been playing games with gcc.dg/tree-ssa/ssa-thread-11.c for quite
some time.  Every time a threading pass gets smarter, we push the
check further down the pipeline.  We've officially run out of dumb
threading passes to disable ;-).  In the last year we've gone up from a
handful of threads, to 34 threads with the current combination of
options.  I doubt this is testing anything useful any more, so I've
removed it.

Similarly for gcc.dg/tree-ssa/ssa-dom-thread-4.c.  We used to thread 3
jump threads, but they were disallowed because of loop rotation.  Then
we started catching more jump threads in VRP2 threading so we tested
there.  With this patch though, we triple the number of threads found
from 11 to 31.  I believe this test has outlived its usefulness, and
I've removed it.  Note that even though we have these outrageous
possibilities for this test, the block copier ultimately chops them
down (23 survive though).

Likewise for ssa-dom-thread-7.c.  The number of threads in this test has
been growing consistently over the years.  There's no way to test
what is possible, especially because improvements in one threader open
up possibilities for another.  With this patch we're up to 41 registered
jump threads and they're spread over 4 passes.  There's no way to get the
amount right, and this test has become a source of useless busywork.

All in all, I believe the simpler jump threading tests, as well as the
gimple FE tests I've added, more than adequately cover us.

Tested on x86-64 Linux.

OK for trunk?

p.s. As usual, some warning pass gets thrown off.  Martin, I've XFAILed
it.


I appreciate the heads up.  I'm happy that the threader has
improved.  I'm obviously not pleased that it has led to regressions
in warnings but I understand that in some cases they might be due
to limitations in the warning code.  I think the test case you have
xfailed might be one such example.  The uninitialized warnings are
exquisitely sensitive to these types of changes.  If/when this patch
is applied please reopen PR 89230 and reference this commit.

Having said that, to maintain the quality of diagnostics,
the work that goes into these nice optimizer improvements needs
to be balanced by an effort to either update the warning code
to cope with the IL changes, or the optimizers need to take care
to avoid exposing undefined code that the warnings are designed
to detect.  I'm concerned not just that the quality of GCC 12
diagnostics has been eroding, but also that it seems to be not
just acceptable but expected.

Martin



gcc/ChangeLog:

* tree-ssa-threadbackward.c (back_threader::find_paths_to_names):
Always try to resolve path without looking back.

gcc/testsuite/ChangeLog:

* gcc.dg/graphite/scop-dsyr2k-2.c: Adjust for jump threading changes.
* gcc.dg/graphite/scop-dsyr2k.c: Same.
* gcc.dg/graphite/scop-dsyrk-2.c: Same.
* gcc.dg/graphite/scop-dsyrk.c: Same.
* gcc.dg/tree-ssa/pr20701.c: Same.
* gcc.dg/tree-ssa/pr20702.c: Same.
* gcc.dg/tree-ssa/pr21086.c: Same.
* gcc.dg/tree-ssa/pr25382.c: Same.
* gcc.dg/tree-ssa/pr58480.c: Same.
* gcc.dg/tree-ssa/ssa-vrp-thread-1.c: Same.
* gcc.dg/tree-ssa/vrp08.c: Same.
* gcc.dg/tree-ssa/vrp55.c: Same.
* gcc.dg/tree-ssa/ssa-dom-thread-4.c: Removed.
* gcc.dg/tree-ssa/ssa-dom-thread-7.c: Removed.
* gcc.dg/tree-ssa/ssa-thread-11.c: Removed.
* gcc.dg/uninit-pr89230-1.c: xfail.
---
  gcc/testsuite/gcc.dg/graphite/scop-dsyr2k-2.c |   1 +
  gcc/testsuite/gcc.dg/graphite/scop-dsyr2k.c   |   1 +
  gcc/testsuite/gcc.dg/graphite/scop-dsyrk-2.c  |   1 +
  gcc/testsuite/gcc.dg/graphite/scop-dsyrk.c|   1 +
  gcc/testsuite/gcc.dg/tree-ssa/pr20701.c   |   2 +-
  gcc/testsuite/gcc.dg/tree-ssa/pr20702.c   |   2 +-
  gcc/testsuite/gcc.dg/tree-ssa/pr21086.c   |   2 +-
  gcc/testsuite/gcc.dg/tree-ssa/pr25382.c   |   2 +-
  gcc/testsuite/gcc.dg/tree-ssa/pr58480.c   |   2 +-
  .../gcc.dg/tree-ssa/ssa-dom-thread-4.c|  60 
  .../gcc.dg/tree-ssa/ssa-dom-thread-7.c| 134 --
  gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-11.c |  50 ---
  .../gcc.dg/tree-ssa/ssa-vrp-thread-1.c|  

Re: [PATCH] AArch64: Improve address rematerialization costs

2021-10-20 Thread Wilco Dijkstra via Gcc-patches
ping


From: Wilco Dijkstra
Sent: 02 June 2021 11:21
To: GCC Patches 
Cc: Kyrylo Tkachov ; Richard Sandiford 

Subject: [PATCH] AArch64: Improve address rematerialization costs 
 
Hi,

Given the large improvements from better register allocation of GOT accesses,
I decided to generalize it to get large gains for normal addressing too:

Improve rematerialization costs of addresses.  The current costs are set too 
high
which results in extra register pressure and spilling.  Using lower costs means
addresses will be rematerialized more often rather than being spilled or causing
spills.  This results in significant codesize reductions and performance gains.
SPECINT2017 improves by 0.27% with LTO and 0.16% without LTO.  Codesize is 0.12%
smaller.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-06-01  Wilco Dijkstra  

    * config/aarch64/aarch64.c (aarch64_rtx_costs): Use better 
rematerialization
    costs for HIGH, LO_SUM and SYMBOL_REF.

---

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
641c83b479e76cbcc75b299eb7ae5f634d9db7cd..08245827daa3f8199b29031e754244c078f0f500
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13444,45 +13444,22 @@ cost_plus:
   return false;  /* All arguments need to be in registers.  */
 }
 
-    case SYMBOL_REF:
+    /* The following costs are used for rematerialization of addresses.
+   Set a low cost for all global accesses - this ensures they are
+   preferred for rematerialization, blocks them from being spilled
+   and reduces register pressure.  The result is significant codesize
+   reductions and performance gains. */
 
-  if (aarch64_cmodel == AARCH64_CMODEL_LARGE
- || aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC)
-   {
- /* LDR.  */
- if (speed)
-   *cost += extra_cost->ldst.load;
-   }
-  else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
-  || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
-   {
- /* ADRP, followed by ADD.  */
- *cost += COSTS_N_INSNS (1);
- if (speed)
-   *cost += 2 * extra_cost->alu.arith;
-   }
-  else if (aarch64_cmodel == AARCH64_CMODEL_TINY
-  || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
-   {
- /* ADR.  */
- if (speed)
-   *cost += extra_cost->alu.arith;
-   }
-
-  if (flag_pic)
-   {
- /* One extra load instruction, after accessing the GOT.  */
- *cost += COSTS_N_INSNS (1);
- if (speed)
-   *cost += extra_cost->ldst.load;
-   }
+    case SYMBOL_REF:
+  *cost = 0;
   return true;
 
 case HIGH:
+  *cost = 0;
+  return true;
+
 case LO_SUM:
-  /* ADRP/ADD (immediate).  */
-  if (speed)
-   *cost += extra_cost->alu.arith;
+  *cost = COSTS_N_INSNS (3) / 4;
   return true;
 
 case ZERO_EXTRACT:


Re: [PATCH v3] AArch64: Improve GOT addressing

2021-10-20 Thread Wilco Dijkstra via Gcc-patches
ping


From: Wilco Dijkstra
Sent: 04 June 2021 14:44
To: Richard Sandiford 
Cc: Kyrylo Tkachov ; GCC Patches 

Subject: [PATCH v3] AArch64: Improve GOT addressing 
 
Hi Richard,

This merges the v1 and v2 patches and removes the spurious MEM from
ldr_got_small_si/di. This has been rebased after [1], and the performance
gain has now doubled.

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571708.html

Improve GOT addressing by treating the instructions as a pair.  This reduces
register pressure and improves code quality significantly.  SPECINT2017 improves
by 0.6% with -fPIC and codesize is 0.73% smaller.  Perlbench has 0.9% smaller
codesize, 1.5% fewer executed instructions and is 1.8% faster on Neoverse N1.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-06-04  Wilco Dijkstra  

    * config/aarch64/aarch64.md (movsi): Split GOT accesses after reload.
    (movdi): Likewise.
    (ldr_got_small_): Remove MEM and LO_SUM, emit ADRP+LDR GOT 
sequence.
    (ldr_got_small_sidi): Likewise.
    * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Delay
    splitting of GOT accesses until after reload. Remove tmp_reg and MEM.
    (aarch64_print_operand): Correctly print got_lo12 in L specifier.
    (aarch64_rtx_costs): Set rematerialization cost for GOT accesses.
    (aarch64_mov_operand_p): Make GOT accesses valid move operands.

---

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
08245827daa3f8199b29031e754244c078f0f500..11ea33c70fb06194fadfe94322fdfa098e5320fc
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3615,6 +3615,14 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 
 case SYMBOL_SMALL_GOT_4G:
   {
+   /* Use movdi for GOT accesses until after reload - this improves
+  CSE and rematerialization.  */
+   if (!reload_completed)
+ {
+   emit_insn (gen_rtx_SET (dest, imm));
+   return;
+ }
+
 /* In ILP32, the mode of dest can be either SImode or DImode,
    while the got entry is always of SImode size.  The mode of
    dest depends on how dest is used: if dest is assigned to a
@@ -3624,34 +3632,21 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
    patterns here (two patterns for ILP32).  */
 
 rtx insn;
-   rtx mem;
-   rtx tmp_reg = dest;
 machine_mode mode = GET_MODE (dest);
 
-   if (can_create_pseudo_p ())
- tmp_reg = gen_reg_rtx (mode);
-
-   emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
 if (mode == ptr_mode)
   {
 if (mode == DImode)
- insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
+ insn = gen_ldr_got_small_di (dest, imm);
 else
- insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
-
-   mem = XVECEXP (SET_SRC (insn), 0, 0);
+ insn = gen_ldr_got_small_si (dest, imm);
   }
 else
   {
 gcc_assert (mode == Pmode);
-
-   insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
-   mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
+   insn = gen_ldr_got_small_sidi (dest, imm);
   }
 
-   gcc_assert (MEM_P (mem));
-   MEM_READONLY_P (mem) = 1;
-   MEM_NOTRAP_P (mem) = 1;
 emit_insn (insn);
 return;
   }
@@ -11019,7 +11014,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
   switch (aarch64_classify_symbolic_expression (x))
 {
 case SYMBOL_SMALL_GOT_4G:
- asm_fprintf (asm_out_file, ":lo12:");
+ asm_fprintf (asm_out_file, ":got_lo12:");
   break;
 
 case SYMBOL_SMALL_TLSGD:
@@ -13452,6 +13447,12 @@ cost_plus:
 
 case SYMBOL_REF:
   *cost = 0;
+
+  /* Use a separate remateralization cost for GOT accesses.  */
+  if (aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC
+ && aarch64_classify_symbol (x, 0) == SYMBOL_SMALL_GOT_4G)
+   *cost = COSTS_N_INSNS (1) / 2;
+
   return true;
 
 case HIGH:
@@ -19907,6 +19908,11 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
   return aarch64_simd_valid_immediate (x, NULL);
 }
 
+  /* GOT accesses are valid moves until after regalloc.  */
+  if (SYMBOL_REF_P (x)
+  && aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G)
+    return true;
+
   x = strip_salt (x);
   if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
 return true;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
abfd84526745d029ad4953eabad6dd17b159a218..30effca6f3562f6870a6cc8097750e63bb0d424d
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1283,8 +1283,11 @@ (define_insn_and_split "*movsi_aarch64"
    fmov\\t%w0, %s1
    fmov\\t%s0, %s1
    * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
-  "CONST_INT_P (operands[1]) &

Re: [PATCH] Try to resolve paths in threader without looking further back.

2021-10-20 Thread Aldy Hernandez via Gcc-patches
On Wed, Oct 20, 2021 at 4:35 PM Martin Sebor  wrote:

> I appreciate the heads up.  I'm happy that the threader has
> improved.  I'm obviously not pleased that it has led to regressions
> in warnings but I understand that in some cases they might be due
> to limitations in the warning code.  I think the test case you have
> xfailed might be one such example.  The uninitialized warnings are
> exquisitely sensitive to these types of changes.  If/when this patch
> is applied please reopen PR 89230 and reference this commit.
>
> Having said that, to maintain the quality of diagnostics,
> the work that goes into these nice optimizer improvements needs
> to be balanced by an effort to either update the warning code
> to cope with the IL changes, or the optimizers need to take care
> to avoid exposing undefined code that the warnings are designed
> to detect.  I'm concerned not just that the quality of GCC 12
> diagnostics has been eroding, but also that it seems to be not
> just acceptable but expected.

You make a very good point.  It is certainly not my intention to make
life difficult for the warning maintainers, but I'm afraid I don't
have sufficient knowledge in the area to improve them.

There may be some low hanging fruit though.  At least in the warnings
that use the ranger, there's no reason to run these passes so late in
the pipeline.  You could run the warning code as early as you want,
insofar as SSA is available and the CFG has been built.  Heck, you may
even be able to run at -O0, though we may need some sort of value
numbering.  I believe Richi even suggested this a while back.

Another thing you could do is rewrite your passes to use actual
ranges, not the pair of ranges that the sprintf code uses (for
example).  We've put a lot of work into making infinite ranges work.
There's no reason to keep using value_ranges, anti ranges, and all
that.  I've mentioned this many times in the past 2+ years.  I even
tried my hand at doing the conversion myself, but I'm afraid I don't
understand the intricacies of the code very well.  I've done all I can
in this area, including porting all the evrp consumers to the ranger,
and recently providing patches for converting the strlen / sprintf
passes.

I'm sorry to rant here, but I've made lots of suggestions in the past
couple years, and they go unheeded.  I eventually get frustrated and
end up doing  a half assed conversion of what I can, myself.

Furthermore, I've mentioned in the past that the sprintf warnings (and
possibly the overflow ones)  depend on precise internal ranges that
evrp or the ranger are calculating.  The fact that they're hard to
read, make it less likely that others (well me, anyhow) will chime in
to help.  It's hard enough to understand the warnings, let alone the
code generating them.

Andrew has also suggested some things in this area.  ISTR there was
some overloading of the base functions you could do to represent
pointer offsets, though I can't remember the details.

Barring the above, at the top of our list for next year is full
support for pointers and floats.  The pointer bits should help, but
you must convert your passes for that to happen.  Pairs of integers
are far too fragile.

And finally, if none of that improves things sufficiently, Andrew and
I have batted around providing an infrastructure for predication that
can replace the uninit stuff which is showing its age, but alas that's
a ways away.

Aldy



(!HELP NEEDED) Where is the doc for the format strings in gcc (for example, %q+D, ...)

2021-10-20 Thread Qing Zhao via Gcc-patches
Hi,

In GCC, there are many utility routines for reporting error, warning, or 
information, for example:

warning (0, "weak declaration of %q+D not supported", decl);
warning_at (stmtloc, OPT_Wmaybe_uninitialized,  "%qE may be used 
uninitialized", ptr));
inform (loc, "in a call to %qT declared with " "attribute %<%s%>", fntype, 
access_str);
error ("%qD is unavailable: %s", node, (const char *) msg);

There are format-strings inside them, “%q+D”, “%qE”, “%qT”, “%qD”, etc, where 
can I find a doc for the details of
These format-strings? Or which source files I should read to understand the 
details?

Thanks a lot for your help.

Qing

Re: [PATCH] Restore --param=max-fsm-thread-length

2021-10-20 Thread Jan-Benedict Glaw
On Wed, 2021-10-20 09:43:42 +0200, Aldy Hernandez via Gcc-patches 
 wrote:
> The removal of --param=max-fsm-thread-length is causing code
> explosion.  I thought that --param=max-fsm-thread-path-insns was a
> better gague for path profitability than raw BB length, but it turns
> out that we don't take into account PHIs when estimating the number of
> statements.
[...]
> 
> This fixes 102814, 102852, and I bet it solves the Linux kernel cross
> compile issue.

It does!

Thanks,
  Jan-Benedict

-- 


signature.asc
Description: PGP signature


Re: [PATCH, v2, OpenMP 5.2, Fortran] Strictly-structured block support for OpenMP directives

2021-10-20 Thread Jakub Jelinek via Gcc-patches
On Wed, Oct 20, 2021 at 08:30:34PM +0800, Chung-Lin Tang wrote:
> 2021-10-20  Chung-Lin Tang  
> 
> gcc/fortran/ChangeLog:
> 
>   * decl.c (gfc_match_end): Add COMP_OMP_STRICTLY_STRUCTURED_BLOCK case
>   together with COMP_BLOCK.
>   * parse.c (parse_omp_structured_block): Change return type to
>   'gfc_statement', add handling for strictly-structured block case, adjust
>   recursive calls to parse_omp_structured_block.
>   (parse_executable): Adjust calls to parse_omp_structured_block.
>   * parse.h (enum gfc_compile_state): Add
>   COMP_OMP_STRICTLY_STRUCTURED_BLOCK.
>   * trans-openmp.c (gfc_trans_omp_workshare): Add EXEC_BLOCK case
>   handling.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gfortran.dg/gomp/cancel-1.f90: Adjust testcase.
>   * gfortran.dg/gomp/nesting-3.f90: Adjust testcase.
>   * gfortran.dg/gomp/strictly-structured-block-1.f90: New test.
>   * gfortran.dg/gomp/strictly-structured-block-2.f90: New test.
>   * gfortran.dg/gomp/strictly-structured-block-3.f90: New test.
> 
> libgomp/ChangeLog:
> 
>   * libgomp.texi (Support of strictly structured blocks in Fortran):
>   Adjust to 'Y'.
>   * testsuite/libgomp.fortran/task-reduction-16.f90: Adjust testcase.

Thanks, looks mostly good now, but I still have nits for the testsuite.

> --- /dev/null
> +++ b/gcc/testsuite/gfortran.dg/gomp/strictly-structured-block-1.f90
> @@ -0,0 +1,211 @@
> +! { dg-do compile }
> +! { dg-options "-fopenmp" }
> +
> +program main
> +  integer :: x, i, n
> +
> +  !$omp parallel
> +  block
> +x = x + 1
> +  end block

I'd prefer not to use those x = j or x = x + 1 etc.
as statements that do random work here whenever possible.
While those are dg-do compile testcases, especially if
it is without dg-errors I think it is preferrable not to show
bad coding examples.
E.g. the x = x + 1 above is wrong for 2 reasons, x is uninitialized
before the parallel, and there is a data race, the threads, teams etc.
can write to x concurrently.
I think better would be to use something like
call do_work
which doesn't have to be defined anywhere and will just stand there
as a black box for unspecified work.

> +  !$omp workshare
> +  block
> +x = x + 1
> +  end block

There are exceptions though, e.g. workshare is such a case, because
e.g. call do_work is not valid in workshare.
So, it is ok to keep using x = x + 1 here if you initialize it
first at the start of the program.

> +  !$omp workshare
> +  block
> +x = 1
> +!$omp critical
> +block
> +  x = 3
> +end block
> +  end block

And then there are cases like the above, please
just use different variables there (all initialized) or
say an array and access different elements in the different spots.

Jakub



Re: [PATCH, rs6000] Disable gimple fold for float or double vec_minmax when fast-math is not set

2021-10-20 Thread Segher Boessenkool
Hi!

On Wed, Oct 20, 2021 at 05:04:56PM +0800, HAO CHEN GUI wrote:
> This patch disables gimple folding for float or double vec_min/max when 
> fast-math is not set. It makes vec_min/max conform with the guide.
> 
> Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
> okay for trunk? Any recommendations? Thanks a lot.
> 
>   I refined the patch according to reviewers' advice. The attachments are 
> the ChangeLog and patch diff in case the email body is messed up.
> 
> 
> ChangeLog
> 
> 2021-10-20 Haochen Gui 
> 
> gcc/
>     * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin):
>     Disable gimple fold for VSX_BUILTIN_XVMINDP, 
> ALTIVEC_BUILTIN_VMINFP,
>     VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP when fast-math 
> is not
>     set.

Content-Type: text/plain; charset=UTF-8; format=flowed

Please don't use flowed.  It makes patches unreadable and unusable if
you do (they will not apply anymore).

Also, the left border should be one tab, not eight spaces, and the right
border is at 80 chars (so there are 72 usable chars on a line).  Don't
end a line in ":" if you don't overflow a line if you put text after it.

> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -12159,6 +12159,14 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator 
> *gsi)
>    return true;
>  /* flavors of vec_min.  */
>  case VSX_BUILTIN_XVMINDP:
> +    case ALTIVEC_BUILTIN_VMINFP:
> +  {
> +   lhs = gimple_call_lhs (stmt);
> +   tree type = TREE_TYPE (lhs);
> +   if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
> + return false;
> +   gcc_fallthrough ();
> +  }

Both vminfp anf xvmindp (and xvminsp and xsmindp) return -0 or the
minimum of +0 and -0, that is okay even with HONOR_SIGNED_ZEROS, I
think?

x[sv]min[sd]p returns the number for the minimum of a NaN and a number,
but vminfp returns a NaN.  Do you really want to make the xvmindp
builtin handle less than it does currently?  And, what about vminfp?
Did tht do the wrong thing before?

There are no tests for any of that apparently.  Hrm.


Segher


Re: [Version 2][Patch][PR102281]do not add BUILTIN_CLEAR_PADDING for variables that are gimple registers

2021-10-20 Thread Qing Zhao via Gcc-patches



> On Oct 18, 2021, at 2:26 PM, Qing Zhao via Gcc-patches 
>  wrote:
> 
> Hi, Jakub,
> 
> This is the 2nd version of the patch based on your comment.
> 
> Bootstrapped on both x86 and aarch64. Regression testings are ongoing.
The regression testing was done. Looks good.

Okay for committing?

Thanks.

Qing
> 
> Please let me know if this is ready for committing?
> 
> Thanks a lot.
> 
> Qing.
> 
> ==
> 
> From d6f60370dee69b5deb3d7ef51873a5e986490782 Mon Sep 17 00:00:00 2001
> From: Qing Zhao 
> Date: Mon, 18 Oct 2021 19:04:39 +
> Subject: [PATCH] PR 102281 (-ftrivial-auto-var-init=zero causes ice)
> 
> Do not add call to __builtin_clear_padding when a variable is a gimple
> register or it might not have padding.
> 
> gcc/ChangeLog:
> 
> 2021-10-18  qing zhao  
> 
>   * gimplify.c (gimplify_decl_expr): Do not add call to
>   __builtin_clear_padding when a variable is a gimple register
>   or it might not have padding.
>   (gimplify_init_constructor): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
> 2021-10-18  qing zhao  
> 
>   * c-c++-common/pr102281.c: New test.
>   * gcc.target/i386/auto-init-2.c: Adjust testing case.
>   * gcc.target/i386/auto-init-4.c: Likewise.
>   * gcc.target/i386/auto-init-6.c: Likewise.
>   * gcc.target/aarch64/auto-init-6.c: Likewise.
> ---
> gcc/gimplify.c| 25 ++-
> gcc/testsuite/c-c++-common/pr102281.c | 17 +
> .../gcc.target/aarch64/auto-init-6.c  |  4 +--
> gcc/testsuite/gcc.target/i386/auto-init-2.c   |  2 +-
> gcc/testsuite/gcc.target/i386/auto-init-4.c   | 10 +++-
> gcc/testsuite/gcc.target/i386/auto-init-6.c   |  7 +++---
> 6 files changed, 47 insertions(+), 18 deletions(-)
> create mode 100644 gcc/testsuite/c-c++-common/pr102281.c
> 
> diff --git a/gcc/gimplify.c b/gcc/gimplify.c
> index d8e4b139349..b27dc0ed308 100644
> --- a/gcc/gimplify.c
> +++ b/gcc/gimplify.c
> @@ -1784,8 +1784,8 @@ gimple_add_init_for_auto_var (tree decl,
>that padding is initialized to zero. So, we always initialize paddings
>to zeroes regardless INIT_TYPE.
>To do the padding initialization, we insert a call to
> -   __BUILTIN_CLEAR_PADDING (&decl, 0, for_auto_init = true).
> -   Note, we add an additional dummy argument for __BUILTIN_CLEAR_PADDING,
> +   __builtin_clear_padding (&decl, 0, for_auto_init = true).
> +   Note, we add an additional dummy argument for __builtin_clear_padding,
>'for_auto_init' to distinguish whether this call is for automatic
>variable initialization or not.
>*/
> @@ -1954,8 +1954,14 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq *seq_p)
>pattern initialization.
>In order to make the paddings as zeroes for pattern init, We
>should add a call to __builtin_clear_padding to clear the
> -  paddings to zero in compatiple with CLANG.  */
> -   if (flag_auto_var_init == AUTO_INIT_PATTERN)
> +  paddings to zero in compatiple with CLANG.
> +  We cannot insert this call if the variable is a gimple register
> +  since __builtin_clear_padding will take the address of the
> +  variable.  As a result, if a long double/_Complex long double
> +  variable will spilled into stack later, its padding is 0XFE.  */
> +   if (flag_auto_var_init == AUTO_INIT_PATTERN
> +   && !is_gimple_reg (decl)
> +   && clear_padding_type_may_have_padding_p (TREE_TYPE (decl)))
>   gimple_add_padding_init_for_auto_var (decl, is_vla, seq_p);
>   }
> }
> @@ -5384,12 +5390,19 @@ gimplify_init_constructor (tree *expr_p, gimple_seq 
> *pre_p, gimple_seq *post_p,
> 
>   /* If the user requests to initialize automatic variables, we
>  should initialize paddings inside the variable.  Add a call to
> - __BUILTIN_CLEAR_PADDING (&object, 0, for_auto_init = true) to
> + __builtin_clear_pading (&object, 0, for_auto_init = true) to
>  initialize paddings of object always to zero regardless of
>  INIT_TYPE.  Note, we will not insert this call if the aggregate
>  variable has be completely cleared already or it's initialized
> - with an empty constructor.  */
> + with an empty constructor.  We cannot insert this call if the
> + variable is a gimple register since __builtin_clear_padding will take
> + the address of the variable.  As a result, if a long double/_Complex 
> long
> + double variable will be spilled into stack later, its padding cannot
> + be cleared with __builtin_clear_padding.  We should clear its padding
> + when it is spilled into memory.  */
>   if (is_init_expr
> +  && !is_gimple_reg (object)
> +  && clear_padding_type_may_have_padding_p (type)
>   && ((AGGREGATE_TYPE_P (type) && !cleared && !is_empty_ctor)
> || !AGGREGATE_TYPE_P (type))
>   && is_var_need_auto_init (object))
> diff --git a/gcc/testsuite/c-c++-common/pr102281.c 
> b/gcc/testsuite/c-

Re: [PATCH 6/8] tree-dynamic-object-size: Handle function parameters

2021-10-20 Thread Martin Sebor via Gcc-patches

On 10/7/21 4:14 PM, Siddhesh Poyarekar wrote:

Handle either static sizes in function parameters or hints provided by
__attribute__ ((access (...))) to compute sizes for objects.


It's been my hope to eventually teach __builtin_object_size about
attribute access but implementing it in the new built-in might be
preferable.  Glad to see you noticed it and took advantage of it!

Does this include handling "VLA function parameters" as in

  void f (int n, char d[n]);   // (or char d[static n])

I don't see tests for it in this patch but since internally, GCC
describes VLA (and array) function arguments using attribute access
hanndling it should automatically give us VLA (and array) support
as well unless we disable it, either intentionally or by accident.
Either way, I would recommend adding tests for VLA parameters.

Martin



gcc/ChangeLog:

* tree-dynamic-object-size.c: Include tree-dfa.h.
(emit_size_stmts): New argument osi.  Handle GIMPLE_NOP.
(eval_size_expr, gimplify_size_exprs): Adjust.
(parm_object_size): New function.
(collect_object_sizes_for): Handle GIMPLE_NOP.

gcc/testsuite/ChangeLog:

* gcc.dg/builtin-dynamic-object-size-0.c (test_parmsz): New
test.
(main): Call it.

Signed-off-by: Siddhesh Poyarekar 
---
  .../gcc.dg/builtin-dynamic-object-size-0.c| 22 +
  gcc/tree-dynamic-object-size.c| 98 +--
  2 files changed, 112 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c 
b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
index 3c2c4c84264..c72fa0508db 100644
--- a/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
+++ b/gcc/testsuite/gcc.dg/builtin-dynamic-object-size-0.c
@@ -255,6 +255,15 @@ test_substring_ptrplus (size_t sz, size_t off)
return __builtin_dynamic_object_size (str + off, 0);
  }
  
+size_t

+__attribute__ ((noinline))


I think attribute noipa might be a better choice if the goal
is to keep GCC from sneaking in data from the caller and so
defeating the purpose of the test.

Martin


+__attribute__ ((access (__read_write__, 1, 2)))
+test_parmsz (void *obj, size_t sz, size_t off)
+{
+  return __builtin_dynamic_object_size (obj + off, 0);
+}
+
+
  int
  main (int argc, char **argv)
  {
@@ -338,6 +347,19 @@ main (int argc, char **argv)
if (test_deploop (128, 129) != 32)
  FAIL ();
  
+  if (test_parmsz (argv[0], __builtin_strlen (argv[0]) + 1, -1)!= 0)

+FAIL ();
+
+  if (test_parmsz (argv[0], __builtin_strlen (argv[0]) + 1, 0)
+  != __builtin_strlen (argv[0]) + 1)
+FAIL ();
+  if (test_parmsz (argv[0], __builtin_strlen (argv[0]) + 1,
+  __builtin_strlen (argv[0]))!= 1)
+FAIL ();
+  if (test_parmsz (argv[0], __builtin_strlen (argv[0]) + 1,
+  __builtin_strlen (argv[0]) + 2)!= 0)
+FAIL ();
+
if (nfails > 0)
  __builtin_abort ();
  
diff --git a/gcc/tree-dynamic-object-size.c b/gcc/tree-dynamic-object-size.c

index f143a64777c..8d7283623dc 100644
--- a/gcc/tree-dynamic-object-size.c
+++ b/gcc/tree-dynamic-object-size.c
@@ -58,6 +58,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "gimple-fold.h"
  #include "gimple-iterator.h"
  #include "tree-cfg.h"
+#include "tree-dfa.h"
  #include "stringpool.h"
  #include "attribs.h"
  #include "builtins.h"
@@ -456,7 +457,7 @@ pass_through_call (const gcall *call)
  
  
  static void

-emit_size_stmts (gimple *stmt, tree wholesize_ssa,
+emit_size_stmts (object_size_info *osi, gimple *stmt, tree wholesize_ssa,
 tree wholesize_expr, tree size_ssa, tree size_expr)
  {
gimple_seq seq = NULL;
@@ -481,7 +482,14 @@ emit_size_stmts (gimple *stmt, tree wholesize_ssa,
   statements involved in evaluation of the object size expression precede
   the definition statement.  For parameters, we don't have a definition
   statement, so insert into the first code basic block.  */
-  gimple_stmt_iterator i = gsi_for_stmt (stmt);
+  gimple_stmt_iterator i;
+  if (gimple_code (stmt) == GIMPLE_NOP)
+{
+  basic_block first_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (osi->fun));
+  i = gsi_start_bb (first_bb);
+}
+  else
+i = gsi_for_stmt (stmt);
gsi_insert_seq_before (&i, seq, GSI_CONTINUE_LINKING);
  }
  
@@ -542,8 +550,8 @@ size_bound_expr (tree sz)

  }
  
  static void

-eval_size_expr (tree var, tree wholesize, tree *wholesize_expr,
-   tree size, tree *size_expr)
+eval_size_expr (struct object_size_info *osi, tree var, tree wholesize,
+   tree *wholesize_expr, tree size, tree *size_expr)
  {
if (size_expr != NULL)
  {
@@ -560,7 +568,7 @@ eval_size_expr (tree var, tree wholesize, tree 
*wholesize_expr,
}
else
{
- emit_size_stmts (stmt, wholesize, *wholesize_expr, size,
+ emit_size_stmts (osi, stmt, wholesize, *wholesize_expr, size,
   size_bound_expr (*size_expr));
  d

Re: [PATCH] X86: Add an option -muse-unaligned-vector-move

2021-10-20 Thread Richard Biener via Gcc-patches
On October 20, 2021 3:19:28 PM GMT+02:00, "H.J. Lu"  wrote:
>On Wed, Oct 20, 2021 at 4:18 AM Richard Biener
> wrote:
>>
>> On Wed, Oct 20, 2021 at 12:40 PM Xu Dianhong  wrote:
>> >
>> > Many thanks for your explanation. I got the meaning of operands.
>> > The "addpd b(%rip), %xmm0" instruction needs "b(%rip)" aligned otherwise 
>> > it will rise a "Real-Address Mode Exceptions".
>> > I haven't considered this situation  "b(%rip)" has an address dependence 
>> > of "a(%rip)" before. I think this situation could be resolved on the 
>> > assembler side except for this dummy code like "movapd 
>> > 0x200b37(%rip),%xmm1, ... addpd  0x200b37(%rip),%xmm0 ".
>>
>> Of course the compiler will only emit instructions which have the
>> constraint of aligned memory
>> when the memory is known to be aligned.  That's why I wonder why you
>> would need such
>> option.  "Real-Address Mode Exceptions" may point to the issue, but I
>> wonder what's different
>> in real mode vs. protected mode - even with segmentation the alignment
>> of objects should
>> prevail unless you play linker"tricks" that make global objects have
>> different alignment - but
>> then it's better to adjust the respective hooks to not falsely claim
>> such alignment.  Consider
>> for example
>>
>>if ((uintptr_t)&a & 0x7)
>>  foo();
>>   else
>>  bar();
>>
>> GCC will optimize the branch statically to always call foo if 'a'
>> appears to be aligned,
>> even if you later try to "override" this with an option.  Alignment is
>> not only about
>> moves, it's also about knowledge about low bits in addresses and about
>> alias analysis where alignment constrains how two objects can overlap.
>>
>> So - do not lie to the compiler!  A late "workaround" avoiding aligned
>> SSE moves isn't a proper fix.
>>
>
>The motivations are
>
>1.  AVX non-load/store ops work on unaligned memory.   Unaligned
>load/store on aligned memory is as fast as aligned load/store on Intel
>AVX machines.   The new switch makes load/store consistent with
>other AVX ops.
>2. We don't properly align the stack for AVX on Windows.  This can
>be used as a workaround for -mavx on Windows.

But this, with lying that the stack is aligned, causes all of the above 
mentioned issues and thus needs to be fixed by either properly aligning the 
stack or not lying to the compiler that we do.

>
>We can change TARGET_USE_UNALIGNED_VECTOR_MOVE
>to require AVX.

But such workaround does not make any sense since it does not fix the 
fundamental underlying problem. 

Richard. 

>



[PATCH] libstdc++: Implement LWG 3481 change to ranges::viewable_range

2021-10-20 Thread Patrick Palka via Gcc-patches
Tested on x86_64-pc-linux-gnu, does this look OK for trunk, and branches
after a while?

libstdc++-v3/ChangeLog:

* include/bits/ranges_base.h (viewable_range): Adjust as per
LWG 3481.
* testsuite/std/ranges/adaptors/all.cc (test07): New test.
---
 libstdc++-v3/include/bits/ranges_base.h   |  3 ++-
 libstdc++-v3/testsuite/std/ranges/adaptors/all.cc | 15 +++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/ranges_base.h 
b/libstdc++-v3/include/bits/ranges_base.h
index 7801b2fd023..43b0b9f7bf3 100644
--- a/libstdc++-v3/include/bits/ranges_base.h
+++ b/libstdc++-v3/include/bits/ranges_base.h
@@ -688,7 +688,8 @@ namespace ranges
   /// A range which can be safely converted to a view.
   template
 concept viewable_range = range<_Tp>
-  && (borrowed_range<_Tp> || view>);
+  && ((view> && 
constructible_from, _Tp>)
+ || (!view> && borrowed_range<_Tp>));
 
   // [range.iter.ops] range iterator operations
 
diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/all.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/all.cc
index 9a6a31e6cb4..ce1384c2890 100644
--- a/libstdc++-v3/testsuite/std/ranges/adaptors/all.cc
+++ b/libstdc++-v3/testsuite/std/ranges/adaptors/all.cc
@@ -159,6 +159,20 @@ test06()
   static_assert(!noexcept(views::all(BorrowedRange(x;
 }
 
+void
+test07()
+{
+  // LWG 3481
+  struct view_t : ranges::empty_view { // move-only view
+view_t(const view_t&) = delete;
+view_t(view_t&&) = default;
+view_t& operator=(const view_t&) = delete;
+view_t& operator=(view_t&&) = default;
+  };
+  static_assert(std::movable && !std::copyable);
+  static_assert(!ranges::viewable_range);
+}
+
 int
 main()
 {
@@ -168,4 +182,5 @@ main()
   static_assert(test04());
   test05();
   test06();
+  test07();
 }
-- 
2.33.1.711.g9d530dc002



[PATCH] libstdc++: Implement LWG 3535 changes to ranges::join_view

2021-10-20 Thread Patrick Palka via Gcc-patches
Tested on x86_64-pc-linux-gnu, does this look OK for trunk, and branches
after a while?

libstdc++-v3/ChangeLog:

* include/std/ranges (join_view::__iter_cat::_S_iter_cat): Adjust
criteria for returning bidirectional_iterator_tag as per LWG3535.
(join_view::_Iterator::_S_iter_concept): Likewise.
---
 libstdc++-v3/include/std/ranges | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 273699aa790..c1519c7dbd5 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -2523,7 +2523,8 @@ namespace views::__adaptor
using _OuterCat = typename 
iterator_traits<_Outer_iter>::iterator_category;
using _InnerCat = typename 
iterator_traits<_Inner_iter>::iterator_category;
if constexpr (derived_from<_OuterCat, bidirectional_iterator_tag>
- && derived_from<_InnerCat, 
bidirectional_iterator_tag>)
+ && derived_from<_InnerCat, bidirectional_iterator_tag>
+ && common_range>>)
  return bidirectional_iterator_tag{};
else if constexpr (derived_from<_OuterCat, forward_iterator_tag>
   && derived_from<_InnerCat, forward_iterator_tag>)
@@ -2575,7 +2576,8 @@ namespace views::__adaptor
  {
if constexpr (_S_ref_is_glvalue
  && bidirectional_range<_Base>
- && bidirectional_range>)
+ && bidirectional_range>
+ && common_range>)
  return bidirectional_iterator_tag{};
else if constexpr (_S_ref_is_glvalue
   && forward_range<_Base>
-- 
2.33.1.711.g9d530dc002



[PATCH] libstdc++: Implement LWG 3595 changes to common_iterator

2021-10-20 Thread Patrick Palka via Gcc-patches
Tested on x86_64-pc-linux-gnu, does this look OK for trunk and branches?

libstdc++-v3/ChangeLog:

* include/bits/stl_iterator.h (common_iterator::__arrow_proxy):
Make fully constexpr.
(common_iterator::__postfix_proxy): Likewise.
---
 libstdc++-v3/include/bits/stl_iterator.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
b/libstdc++-v3/include/bits/stl_iterator.h
index 8afd6756613..da3c8d5bd97 100644
--- a/libstdc++-v3/include/bits/stl_iterator.h
+++ b/libstdc++-v3/include/bits/stl_iterator.h
@@ -1821,13 +1821,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 {
   iter_value_t<_It> _M_keep;
 
-  __arrow_proxy(iter_reference_t<_It>&& __x)
+  constexpr __arrow_proxy(iter_reference_t<_It>&& __x)
   : _M_keep(std::move(__x)) { }
 
   friend class common_iterator;
 
 public:
-  const iter_value_t<_It>*
+  constexpr const iter_value_t<_It>*
   operator->() const noexcept
   { return std::__addressof(_M_keep); }
 };
@@ -1836,13 +1836,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 {
   iter_value_t<_It> _M_keep;
 
-  __postfix_proxy(iter_reference_t<_It>&& __x)
+  constexpr __postfix_proxy(iter_reference_t<_It>&& __x)
   : _M_keep(std::forward>(__x)) { }
 
   friend class common_iterator;
 
 public:
-  const iter_value_t<_It>&
+  constexpr const iter_value_t<_It>&
   operator*() const noexcept
   { return _M_keep; }
 };
-- 
2.33.1.711.g9d530dc002



Re: [PATCH 6/8] tree-dynamic-object-size: Handle function parameters

2021-10-20 Thread Siddhesh Poyarekar

On 10/20/21 22:26, Martin Sebor wrote:

On 10/7/21 4:14 PM, Siddhesh Poyarekar wrote:

Handle either static sizes in function parameters or hints provided by
__attribute__ ((access (...))) to compute sizes for objects.


It's been my hope to eventually teach __builtin_object_size about
attribute access but implementing it in the new built-in might be
preferable.  Glad to see you noticed it and took advantage of it!

Does this include handling "VLA function parameters" as in

   void f (int n, char d[n]);   // (or char d[static n])

I don't see tests for it in this patch but since internally, GCC
describes VLA (and array) function arguments using attribute access
hanndling it should automatically give us VLA (and array) support
as well unless we disable it, either intentionally or by accident.
Either way, I would recommend adding tests for VLA parameters.


Not in this patchset, but I did notice it and put it in my list of 
things to add on once this makes it in.


Thanks,
Siddhesh


Re: [PATCH] doc: Fix documentation around 'asm' keyword in C++

2021-10-20 Thread Jeff Law via Gcc-patches




On 10/20/2021 6:28 AM, Jonathan Wakely via Gcc-patches wrote:

The documentation on asm statements suggests asm is always a GNU
extension, but it's been part of ISO C++ since the first standard.

The documentation of -fno-asm is wrong for C++ as it states that it only
affects typeof, but actually it affects typeof and asm (despite asm
being part of ISO C++).

gcc/ChangeLog:

* doc/extend.texi (Basic Asm): Clarify that asm is not an
extension in C++.
* doc/invoke.texi (-fno-asm): Fix description for C++.

OK for trunk?

OK
jeff



Re: [RFC] Remove VRP threader passes in exchange for better threading pre-VRP.

2021-10-20 Thread Jeff Law via Gcc-patches




On 10/20/2021 3:27 AM, Aldy Hernandez wrote:

On Wed, Oct 20, 2021 at 1:00 AM Jeff Law  wrote:



On 10/18/2021 8:03 AM, Aldy Hernandez wrote:


On 10/18/21 3:41 PM, Aldy Hernandez wrote:


I've been experimenting with reducing the total number of threading
passes, and I'd like to see if there's consensus/stomach for altering
the pipeline.  Note, that the goal is to remove forward threader
clients,
not the other way around.  So, we should prefer to remove a VRP threader
instance over a *.thread one immediately before VRP.

After some playing, it looks like if we enable fully-resolving mode in
the *.thread passes immediately preceeding VRP, we can remove the VRP
threading passes altogether, thus removing 2 threading passes (and
forward threading passes at that!).

It occurs to me that we could also remove the threading before VRP
passes, and enable a fully-resolving backward threader after VRP. I
haven't played with this scenario, but it should be just as good.
That being said, I don't know the intricacies of why we had both pre
and post VRP threading passes, and if one is ideally better than the
other.

The only post-VRP threading pass that (in my mind) makes sense is the
one sitting between VRP and DOM and it should replace the DOM based
threader.

Yes, that's the goal, but it won't happen on this release because of
floats.  The DOM threader uses the const/avails machinery to thread
conditionals involving floats, something the path solver can't do
because it depends on gori/ranger.  Adding floats to ranger is
probably our #1 task for the next cycle.

Now before Andrew gets clever, the relation oracle is technically type
agnostic, so it could theoretically be possible to use it in the DOM
threader and replace all the const/avails stuff.  But I'd like to go
on vacation at some point ;-).
Correct.  I just wanted to make it clear that as the backwards threader 
improves I see less and less of a need to run as many threader passes.


For VRP1 -> DOM2, I could see removing the threading path from DOM2 and 
having a backwards threader between VRP1 and DOM2.  I don't see 
significant value in having a threading pass of any sort after VRP2 as 
the vast majority of things are handled by then.


Jeff


Re: (!HELP NEEDED) Where is the doc for the format strings in gcc (for example, %q+D, ...)

2021-10-20 Thread Marek Polacek via Gcc-patches
On Wed, Oct 20, 2021 at 03:49:09PM +, Qing Zhao via Gcc-patches wrote:
> Hi,
> 
> In GCC, there are many utility routines for reporting error, warning, or 
> information, for example:
> 
> warning (0, "weak declaration of %q+D not supported", decl);
> warning_at (stmtloc, OPT_Wmaybe_uninitialized,  "%qE may be used 
> uninitialized", ptr));
> inform (loc, "in a call to %qT declared with " "attribute %<%s%>", fntype, 
> access_str);
> error ("%qD is unavailable: %s", node, (const char *) msg);
> 
> There are format-strings inside them, “%q+D”, “%qE”, “%qT”, “%qD”, etc, where 
> can I find a doc for the details of
> These format-strings? Or which source files I should read to understand the 
> details?

You can take a look at cp/error.c:

/* Called from output_format -- during diagnostic message processing --
   to handle C++ specific format specifier with the following meanings:
   %A   function argument-list.
   %C   tree code.
   %D   declaration.
   %E   expression.
   %F   function declaration.
   %H   type difference (from).
   %I   type difference (to).
   %L   language as used in extern "lang".
   %O   binary operator.
   %P   function parameter whose position is indicated by an integer.
   %Q   assignment operator.
   %S   substitution (template + args)
   %T   type.
   %V   cv-qualifier.
   %X   exception-specification.  */
static bool 
cp_printer (pretty_printer *pp, text_info *text, const char *spec,

or c/c-objc-common.c:

/* Called during diagnostic message formatting process to print a
   source-level entity onto BUFFER.  The meaning of the format specifiers
   is as follows:
   %D: a general decl,
   %E: an identifier or expression,
   %F: a function declaration,
   %T: a type.
   %V: a list of type qualifiers from a tree.
   %v: an explicit list of type qualifiers
   %#v: an explicit list of type qualifiers of a function type.

   Please notice when called, the `%' part was already skipped by the
   diagnostic machinery.  */
static bool
c_tree_printer (pretty_printer *pp, text_info *text, const char *spec,

Marek



Re: [PATCH] Attempt to resolve all incoming paths to a PHI.

2021-10-20 Thread Jeff Law via Gcc-patches




On 10/20/2021 6:37 AM, Aldy Hernandez wrote:

The code that threads incoming paths to a PHI is duplicating what we
do generically in find_paths_to_names.  This shortcoming is actually
one of the reasons we aren't threading all possible paths into a PHI.
For example, we give up after finding one threadable path, but some
PHIs have multiple threadable paths:

   // x_5 = PHI <10(4), 20(5), ...>
   // if (x_5 > 5)

Addressing this not only fixes the oversight, but simplifies the
PHI handling code, since we can consider the PHI fully resolved upon
return.

Interestingly, for ssa-thread-12.c the main thread everything was
hinging on was unreachable.  With this patch, we call
maybe_register_path() earlier.  In doing so, the solver realizes
that any path starting with 4->8 is unreachable and can be avoided.
This caused the cascade of threadable paths that depended on this
to no longer happen.  Since threadable paths in thread[34] was the only
thing this test was testing, there's no longer anything to test.  Neat!

Tested on x86-64 Linux.

OK for trunk?

gcc/ChangeLog:

* tree-ssa-threadbackward.c (back_threader::resolve_phi):
Attempt to resolve all incoming paths to a PHI.
(back_threader::resolve_def): Always return true for PHIs.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr21090.c: Adjust for threading.
* gcc.dg/tree-ssa/ssa-thread-12.c: Removed.

OK
jeff



Re: [PATCH] Attempt to resolve all incoming paths to a PHI.

2021-10-20 Thread Aldy Hernandez via Gcc-patches
Thanks.  I'm going to wait until/if you approve the previous patch in
the series before I commit:

[PATCH] Try to resolve paths in threader without looking further back.

..because otherwise I'll have to test again and twiddle tests in a
different order.

Aldy

On Wed, Oct 20, 2021 at 8:04 PM Jeff Law  wrote:
>
>
>
> On 10/20/2021 6:37 AM, Aldy Hernandez wrote:
> > The code that threads incoming paths to a PHI is duplicating what we
> > do generically in find_paths_to_names.  This shortcoming is actually
> > one of the reasons we aren't threading all possible paths into a PHI.
> > For example, we give up after finding one threadable path, but some
> > PHIs have multiple threadable paths:
> >
> >// x_5 = PHI <10(4), 20(5), ...>
> >// if (x_5 > 5)
> >
> > Addressing this not only fixes the oversight, but simplifies the
> > PHI handling code, since we can consider the PHI fully resolved upon
> > return.
> >
> > Interestingly, for ssa-thread-12.c the main thread everything was
> > hinging on was unreachable.  With this patch, we call
> > maybe_register_path() earlier.  In doing so, the solver realizes
> > that any path starting with 4->8 is unreachable and can be avoided.
> > This caused the cascade of threadable paths that depended on this
> > to no longer happen.  Since threadable paths in thread[34] was the only
> > thing this test was testing, there's no longer anything to test.  Neat!
> >
> > Tested on x86-64 Linux.
> >
> > OK for trunk?
> >
> > gcc/ChangeLog:
> >
> >   * tree-ssa-threadbackward.c (back_threader::resolve_phi):
> >   Attempt to resolve all incoming paths to a PHI.
> >   (back_threader::resolve_def): Always return true for PHIs.
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.dg/tree-ssa/pr21090.c: Adjust for threading.
> >   * gcc.dg/tree-ssa/ssa-thread-12.c: Removed.
> OK
> jeff
>



[PATCH] libstdc++: Implement LWG 3590-3592 changes to split_view/lazy_split_view

2021-10-20 Thread Patrick Palka via Gcc-patches
Tested on x86_64-pc-linux-gnu, does this look OK for trunk, and 11 after
a while?

libstdc++-v3/ChangeLog:

* include/std/ranges (lazy_split_view::base): Add forward_range
constraint as per LWG 3591.
(lazy_split_view::begin, lazy_split_view::end): Also check
simpleness of _Pattern as per LWG 3592.
(split_view::base): Relax copyable constraint as per LWG 3590.
---
 libstdc++-v3/include/std/ranges | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index c1519c7dbd5..30ba0606869 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -3137,7 +3137,7 @@ namespace views::__adaptor
  { return _M_i_current(); }
 
  constexpr iterator_t<_Base>
- base() &&
+ base() && requires forward_range<_Vp>
  { return std::move(_M_i_current()); }
 
  constexpr decltype(auto)
@@ -3229,8 +3229,11 @@ namespace views::__adaptor
   begin()
   {
if constexpr (forward_range<_Vp>)
- return _OuterIter<__detail::__simple_view<_Vp>>{
- this, ranges::begin(_M_base)};
+ {
+   constexpr bool __simple
+ = __detail::__simple_view<_Vp> && 
__detail::__simple_view<_Pattern>;
+   return _OuterIter<__simple>{this, ranges::begin(_M_base)};
+ }
else
  {
_M_current = ranges::begin(_M_base);
@@ -3247,8 +3250,9 @@ namespace views::__adaptor
   constexpr auto
   end() requires forward_range<_Vp> && common_range<_Vp>
   {
-   return _OuterIter<__detail::__simple_view<_Vp>>{
-   this, ranges::end(_M_base)};
+   constexpr bool __simple
+ = __detail::__simple_view<_Vp> && __detail::__simple_view<_Pattern>;
+   return _OuterIter<__simple>{this, ranges::end(_M_base)};
   }
 
   constexpr auto
@@ -3339,7 +3343,7 @@ namespace views::__adaptor
 { }
 
 constexpr _Vp
-base() const& requires copyable<_Vp>
+base() const& requires copy_constructible<_Vp>
 { return _M_base; }
 
 constexpr _Vp
-- 
2.33.1.711.g9d530dc002



Re: [PATCH] X86: Add an option -muse-unaligned-vector-move

2021-10-20 Thread H.J. Lu via Gcc-patches
On Wed, Oct 20, 2021 at 9:58 AM Richard Biener
 wrote:
>
> On October 20, 2021 3:19:28 PM GMT+02:00, "H.J. Lu"  
> wrote:
> >On Wed, Oct 20, 2021 at 4:18 AM Richard Biener
> > wrote:
> >>
> >> On Wed, Oct 20, 2021 at 12:40 PM Xu Dianhong  wrote:
> >> >
> >> > Many thanks for your explanation. I got the meaning of operands.
> >> > The "addpd b(%rip), %xmm0" instruction needs "b(%rip)" aligned otherwise 
> >> > it will rise a "Real-Address Mode Exceptions".
> >> > I haven't considered this situation  "b(%rip)" has an address dependence 
> >> > of "a(%rip)" before. I think this situation could be resolved on the 
> >> > assembler side except for this dummy code like "movapd 
> >> > 0x200b37(%rip),%xmm1, ... addpd  0x200b37(%rip),%xmm0 ".
> >>
> >> Of course the compiler will only emit instructions which have the
> >> constraint of aligned memory
> >> when the memory is known to be aligned.  That's why I wonder why you
> >> would need such
> >> option.  "Real-Address Mode Exceptions" may point to the issue, but I
> >> wonder what's different
> >> in real mode vs. protected mode - even with segmentation the alignment
> >> of objects should
> >> prevail unless you play linker"tricks" that make global objects have
> >> different alignment - but
> >> then it's better to adjust the respective hooks to not falsely claim
> >> such alignment.  Consider
> >> for example
> >>
> >>if ((uintptr_t)&a & 0x7)
> >>  foo();
> >>   else
> >>  bar();
> >>
> >> GCC will optimize the branch statically to always call foo if 'a'
> >> appears to be aligned,
> >> even if you later try to "override" this with an option.  Alignment is
> >> not only about
> >> moves, it's also about knowledge about low bits in addresses and about
> >> alias analysis where alignment constrains how two objects can overlap.
> >>
> >> So - do not lie to the compiler!  A late "workaround" avoiding aligned
> >> SSE moves isn't a proper fix.
> >>
> >
> >The motivations are
> >
> >1.  AVX non-load/store ops work on unaligned memory.   Unaligned
> >load/store on aligned memory is as fast as aligned load/store on Intel
> >AVX machines.   The new switch makes load/store consistent with
> >other AVX ops.
> >2. We don't properly align the stack for AVX on Windows.  This can
> >be used as a workaround for -mavx on Windows.
>
> But this, with lying that the stack is aligned, causes all of the above 
> mentioned issues and thus needs to be fixed by either properly aligning the 
> stack or not lying to the compiler that we do.
>
> >
> >We can change TARGET_USE_UNALIGNED_VECTOR_MOVE
> >to require AVX.
>
> But such workaround does not make any sense since it does not fix the 
> fundamental underlying problem.
>

There is a long standing desire to remove alignment checking (#AC(0)).
For integer operations, alignment checking is disabled in hardware.
For AVX ops, alignment checking is disabled in hardware for non-load/store
instructions.  But we can't disable alignment checking in hardware for
aligned load/store instructions.  -muse-unaligned-vector-move implements
disabling alignment checking for all AVX ops.

-- 
H.J.


Re: Old installation docs

2021-10-20 Thread Jonathan Wakely via Gcc-patches
On Wed, 20 Oct 2021 at 18:10, Martin Liška wrote:

> On 10/20/21 18:59, Jonathan Wakely via Gcc wrote:
> > On Wed, 20 Oct 2021 at 17:40, Joseph Myers wrote:
> >
> >> On Wed, 20 Oct 2021, Jonathan Wakely via Gcc wrote:
> >>
> >>> https://gcc.gnu.org/install/ says:
> >>>
> >>> "There are also some old installation instructions
> >>> , which are mostly obsolete but
> >> still
> >>> contain some information which has not yet been merged into the main
> part
> >>> of this manual."
> >>
> >> Those should have been removed in GCC commit
> >> 431d26e1dd18c1146d3d4dcd3b45a3b04f7f7d59, it seems that forgot to remove
> >> the link in the HTML version.
> >>
> >>
> > Aha, thanks. I will submit a patch to remove the link.
> >
>
> Please do so, I really forgot about it.
>

Done by the attached patch, pushed as r12-4580.
commit 885f9b4ad59a1c37742b68505edc80c7f419d9a4
Author: Jonathan Wakely 
Date:   Wed Oct 20 19:39:15 2021

doc: Remove broken link to old.html docs

The target of this link was removed in r12-1061.

gcc/ChangeLog:

* doc/install.texi: Remove link to old.html

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index edcaae3f55a..7c775965964 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -198,12 +198,6 @@ remove that directory when you do not need that specific 
version of GCC
 any longer, and, if shared libraries are installed there as well, no
 more binaries exist that use them.
 
-@ifhtml
-There are also some @uref{old.html,,old installation instructions},
-which are mostly obsolete but still contain some information which has
-not yet been merged into the main part of this manual.
-@end ifhtml
-
 @html
 
 


Re: [PATCH] libstdc++: Implement LWG 3590-3592 changes to split_view/lazy_split_view

2021-10-20 Thread Jonathan Wakely via Gcc-patches
On Wed, 20 Oct 2021 at 19:17, Patrick Palka wrote:
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk, and 11 after
> a while?

Yes for both, thanks.



Re: [PATCH] doc: Fix documentation around 'asm' keyword in C++

2021-10-20 Thread Jonathan Wakely via Gcc-patches
On Wed, 20 Oct 2021 at 18:44, Jeff Law wrote:
> On 10/20/2021 6:28 AM, Jonathan Wakely via Gcc-patches wrote:
> > The documentation on asm statements suggests asm is always a GNU
> > extension, but it's been part of ISO C++ since the first standard.
> >
> > The documentation of -fno-asm is wrong for C++ as it states that it only
> > affects typeof, but actually it affects typeof and asm (despite asm
> > being part of ISO C++).
> >
> > gcc/ChangeLog:
> >
> >   * doc/extend.texi (Basic Asm): Clarify that asm is not an
> >   extension in C++.
> >   * doc/invoke.texi (-fno-asm): Fix description for C++.
> >
> > OK for trunk?
> OK

On IRC Jakub and Segher suggested slightly different wording for the
Basic Asm section, to clarify what I was trying to say about -std=cNN
options. I pushed something based on Segher's suggestion ("-ansi and
the -std options that select C dialects without GNU extensions"). The
final version is attached to this mail, I hope it was OK to tweak it
without getting approval again.
commit 154c6d430ee173904237de64d5aae11565201318
Author: Jonathan Wakely 
Date:   Wed Oct 20 19:41:49 2021

doc: Fix documentation around 'asm' keyword in C++

The documentation on asm statements suggests asm is always a GNU
extension, but it's been part of ISO C++ since the first standard.

The documentation of -fno-asm is wrong for C++ as it states that it only
affects typeof, but actually it affects typeof and asm (despite asm
being part of ISO C++).

gcc/ChangeLog:

* doc/extend.texi (Basic Asm): Clarify that asm is not an
extension in C++.
* doc/invoke.texi (-fno-asm): Fix description for C++.

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 3c942d81c32..eee4c6737bb 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9728,10 +9728,12 @@ A basic @code{asm} statement has the following syntax:
 asm @var{asm-qualifiers} ( @var{AssemblerInstructions} )
 @end example
 
-The @code{asm} keyword is a GNU extension.
-When writing code that can be compiled with @option{-ansi} and the
-various @option{-std} options, use @code{__asm__} instead of 
-@code{asm} (@pxref{Alternate Keywords}).
+For the C language, the @code{asm} keyword is a GNU extension.
+When writing C code that can be compiled with @option{-ansi} and the
+@option{-std} options that select C dialects without GNU extensions, use
+@code{__asm__} instead of @code{asm} (@pxref{Alternate Keywords}).  For
+the C++ language, @code{asm} is a standard keyword, but @code{__asm__}
+can be used for code compiled with @option{-fno-asm}.
 
 @subsubheading Qualifiers
 @table @code
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c93d822431f..6d1e328571a 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -2485,14 +2485,14 @@ supported for C as this construct is allowed by C++.
 Do not recognize @code{asm}, @code{inline} or @code{typeof} as a
 keyword, so that code can use these words as identifiers.  You can use
 the keywords @code{__asm__}, @code{__inline__} and @code{__typeof__}
-instead.  @option{-ansi} implies @option{-fno-asm}.
+instead.  In C, @option{-ansi} implies @option{-fno-asm}.
 
-In C++, this switch only affects the @code{typeof} keyword, since
-@code{asm} and @code{inline} are standard keywords.  You may want to
-use the @option{-fno-gnu-keywords} flag instead, which has the same
-effect.  In C99 mode (@option{-std=c99} or @option{-std=gnu99}), this
-switch only affects the @code{asm} and @code{typeof} keywords, since
-@code{inline} is a standard keyword in ISO C99.
+In C++, @code{inline} is a standard keyword and is not affected by
+this switch.  You may want to use the @option{-fno-gnu-keywords} flag
+instead, which disables @code{typeof} but not @code{asm} and
+@code{inline}.  In C99 mode (@option{-std=c99} or @option{-std=gnu99}),
+this switch only affects the @code{asm} and @code{typeof} keywords,
+since @code{inline} is a standard keyword in ISO C99.
 
 @item -fno-builtin
 @itemx -fno-builtin-@var{function}


Re: [PATCH] libstdc++: Implement LWG 3481 change to ranges::viewable_range

2021-10-20 Thread Jonathan Wakely via Gcc-patches
On Wed, 20 Oct 2021 at 18:01, Patrick Palka wrote:
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk, and branches
> after a while?

Yes for all.

>
> libstdc++-v3/ChangeLog:
>
> * include/bits/ranges_base.h (viewable_range): Adjust as per
> LWG 3481.
> * testsuite/std/ranges/adaptors/all.cc (test07): New test.
> ---
>  libstdc++-v3/include/bits/ranges_base.h   |  3 ++-
>  libstdc++-v3/testsuite/std/ranges/adaptors/all.cc | 15 +++
>  2 files changed, 17 insertions(+), 1 deletion(-)
>
> diff --git a/libstdc++-v3/include/bits/ranges_base.h 
> b/libstdc++-v3/include/bits/ranges_base.h
> index 7801b2fd023..43b0b9f7bf3 100644
> --- a/libstdc++-v3/include/bits/ranges_base.h
> +++ b/libstdc++-v3/include/bits/ranges_base.h
> @@ -688,7 +688,8 @@ namespace ranges
>/// A range which can be safely converted to a view.
>template
>  concept viewable_range = range<_Tp>
> -  && (borrowed_range<_Tp> || view>);
> +  && ((view> && 
> constructible_from, _Tp>)
> + || (!view> && borrowed_range<_Tp>));
>
>// [range.iter.ops] range iterator operations
>
> diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/all.cc 
> b/libstdc++-v3/testsuite/std/ranges/adaptors/all.cc
> index 9a6a31e6cb4..ce1384c2890 100644
> --- a/libstdc++-v3/testsuite/std/ranges/adaptors/all.cc
> +++ b/libstdc++-v3/testsuite/std/ranges/adaptors/all.cc
> @@ -159,6 +159,20 @@ test06()
>static_assert(!noexcept(views::all(BorrowedRange(x;
>  }
>
> +void
> +test07()
> +{
> +  // LWG 3481
> +  struct view_t : ranges::empty_view { // move-only view
> +view_t(const view_t&) = delete;
> +view_t(view_t&&) = default;
> +view_t& operator=(const view_t&) = delete;
> +view_t& operator=(view_t&&) = default;
> +  };
> +  static_assert(std::movable && !std::copyable);
> +  static_assert(!ranges::viewable_range);
> +}
> +
>  int
>  main()
>  {
> @@ -168,4 +182,5 @@ main()
>static_assert(test04());
>test05();
>test06();
> +  test07();
>  }
> --
> 2.33.1.711.g9d530dc002
>



Re: [PATCH] libstdc++: Implement LWG 3535 changes to ranges::join_view

2021-10-20 Thread Jonathan Wakely via Gcc-patches
On Wed, 20 Oct 2021 at 18:03, Patrick Palka via Libstdc++
 wrote:
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk, and branches
> after a while?

Yes for all.


>
> libstdc++-v3/ChangeLog:
>
> * include/std/ranges (join_view::__iter_cat::_S_iter_cat): Adjust
> criteria for returning bidirectional_iterator_tag as per LWG3535.
> (join_view::_Iterator::_S_iter_concept): Likewise.
> ---
>  libstdc++-v3/include/std/ranges | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
> index 273699aa790..c1519c7dbd5 100644
> --- a/libstdc++-v3/include/std/ranges
> +++ b/libstdc++-v3/include/std/ranges
> @@ -2523,7 +2523,8 @@ namespace views::__adaptor
> using _OuterCat = typename 
> iterator_traits<_Outer_iter>::iterator_category;
> using _InnerCat = typename 
> iterator_traits<_Inner_iter>::iterator_category;
> if constexpr (derived_from<_OuterCat, bidirectional_iterator_tag>
> - && derived_from<_InnerCat, 
> bidirectional_iterator_tag>)
> + && derived_from<_InnerCat, 
> bidirectional_iterator_tag>
> + && common_range>>)
>   return bidirectional_iterator_tag{};
> else if constexpr (derived_from<_OuterCat, forward_iterator_tag>
>&& derived_from<_InnerCat, 
> forward_iterator_tag>)
> @@ -2575,7 +2576,8 @@ namespace views::__adaptor
>   {
> if constexpr (_S_ref_is_glvalue
>   && bidirectional_range<_Base>
> - && bidirectional_range>)
> + && bidirectional_range>
> + && common_range>)
>   return bidirectional_iterator_tag{};
> else if constexpr (_S_ref_is_glvalue
>&& forward_range<_Base>
> --
> 2.33.1.711.g9d530dc002
>



Re: [PATCH] libstdc++: Implement LWG 3595 changes to common_iterator

2021-10-20 Thread Jonathan Wakely via Gcc-patches
On Wed, 20 Oct 2021 at 18:04, Patrick Palka via Libstdc++
 wrote:
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk and branches?

I thought I'd already done this one, maybe I have it in a local Git stash.

OK for all branches, thanks.

>
> libstdc++-v3/ChangeLog:
>
> * include/bits/stl_iterator.h (common_iterator::__arrow_proxy):
> Make fully constexpr.
> (common_iterator::__postfix_proxy): Likewise.
> ---
>  libstdc++-v3/include/bits/stl_iterator.h | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
> b/libstdc++-v3/include/bits/stl_iterator.h
> index 8afd6756613..da3c8d5bd97 100644
> --- a/libstdc++-v3/include/bits/stl_iterator.h
> +++ b/libstdc++-v3/include/bits/stl_iterator.h
> @@ -1821,13 +1821,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  {
>iter_value_t<_It> _M_keep;
>
> -  __arrow_proxy(iter_reference_t<_It>&& __x)
> +  constexpr __arrow_proxy(iter_reference_t<_It>&& __x)
>: _M_keep(std::move(__x)) { }
>
>friend class common_iterator;
>
>  public:
> -  const iter_value_t<_It>*
> +  constexpr const iter_value_t<_It>*
>operator->() const noexcept
>{ return std::__addressof(_M_keep); }
>  };
> @@ -1836,13 +1836,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  {
>iter_value_t<_It> _M_keep;
>
> -  __postfix_proxy(iter_reference_t<_It>&& __x)
> +  constexpr __postfix_proxy(iter_reference_t<_It>&& __x)
>: _M_keep(std::forward>(__x)) { }
>
>friend class common_iterator;
>
>  public:
> -  const iter_value_t<_It>&
> +  constexpr const iter_value_t<_It>&
>operator*() const noexcept
>{ return _M_keep; }
>  };
> --
> 2.33.1.711.g9d530dc002
>



Re: [PATCH][WIP] Add install-dvi Makefile targets

2021-10-20 Thread Eric Gallager via Gcc-patches
On Tue, Oct 19, 2021 at 1:41 AM Thomas Koenig  wrote:
>
> Hi Eric,
>
> > Hi, I have updated this patch and tested it with more languages now; I
> > can now confirm that it works with ada, d, and fortran now. The only
> > languages that remain untested now are go (since I'm building on
> > darwin and go doesn't build on darwin anyways, as per bug 46986) and
> > jit (which I ran into a bug about that I brought up on IRC, and will
> > probably need to file on bugzilla). OK to install?
>
> Fortran parts look good.
>
> Best regards
>
> Thomas

OK, thanks... so... at this point, who do I still need approval from
for the rest of it, then? Should I be cc-ing the build system
maintainers? The maintainers for all the rest of the subdirectories
I'm touching? Global reviewers? Someone else?
Thanks,
Eric Gallager


gcc-patches@gcc.gnu.org

2021-10-20 Thread Jonathan Wakely via Gcc-patches
On Wed, 20 Oct 2021 at 09:42, Jonathan Wakely wrote:
>
> On Wed, 20 Oct 2021 at 06:48, Tim Song wrote:
> >
> > On Tue, Oct 19, 2021 at 9:05 AM Jonathan Wakely via Gcc-patches
> >  wrote:
> > >
> > > +constexpr bool
> > > +test_copy_elision()
> > > +{
> > > +  return true;
> > > +}
> > > +
> > > +static_assert( test_copy_elision() );
> > > +
> >
> > This isn't much of a test :)
>
> The ultimate copy elision. The copies even get removed from the source code.
>
> Thanks for spotting it. I meant to check we're constructing directly
> into the union member without materializing the temporary early. I'll
> add that today.

Fixed by the attached patch. Tested x86_64-linux, pushed to trunk.

Thanks again!
commit 4ba4b053151a20262d4b61eb4501aa1c48337abb
Author: Jonathan Wakely 
Date:   Wed Oct 20 20:12:28 2021

libstdc++: Add missing test for std::optional::transform(F&&)

The test_copy_elision() function was supposed to ensure that the result
is constructed directly in the std::optional, without early temporary
materialization. But I forgot to write the test.

libstdc++-v3/ChangeLog:

* testsuite/20_util/optional/monadic/transform.cc: Check that
an rvalue result is not materialized too soon.

diff --git a/libstdc++-v3/testsuite/20_util/optional/monadic/transform.cc 
b/libstdc++-v3/testsuite/20_util/optional/monadic/transform.cc
index d01ccb2e0f2..13977b8ba8d 100644
--- a/libstdc++-v3/testsuite/20_util/optional/monadic/transform.cc
+++ b/libstdc++-v3/testsuite/20_util/optional/monadic/transform.cc
@@ -110,6 +110,23 @@ static_assert( test_forwarding() );
 constexpr bool
 test_copy_elision()
 {
+  struct immovable
+  {
+constexpr immovable(int p) : power_level(p) { }
+immovable(immovable&&) = delete;
+
+int power_level;
+  };
+
+  struct Force
+  {
+constexpr immovable operator()(int i) const { return {i+1}; }
+  };
+
+  std::optional irresistible(9000);
+  std::optional object = irresistible.transform(Force{});
+  VERIFY( object->power_level > 9000 );
+
   return true;
 }
 


Re: (!HELP NEEDED) Where is the doc for the format strings in gcc (for example, %q+D, ...)

2021-10-20 Thread Qing Zhao via Gcc-patches
Hi, Marek,

Thanks a lot for the information. 

Really helpful.

Qing

> On Oct 20, 2021, at 12:57 PM, Marek Polacek  wrote:
> 
> On Wed, Oct 20, 2021 at 03:49:09PM +, Qing Zhao via Gcc-patches wrote:
>> Hi,
>> 
>> In GCC, there are many utility routines for reporting error, warning, or 
>> information, for example:
>> 
>> warning (0, "weak declaration of %q+D not supported", decl);
>> warning_at (stmtloc, OPT_Wmaybe_uninitialized,  "%qE may be used 
>> uninitialized", ptr));
>> inform (loc, "in a call to %qT declared with " "attribute %<%s%>", fntype, 
>> access_str);
>> error ("%qD is unavailable: %s", node, (const char *) msg);
>> 
>> There are format-strings inside them, “%q+D”, “%qE”, “%qT”, “%qD”, etc, 
>> where can I find a doc for the details of
>> These format-strings? Or which source files I should read to understand the 
>> details?
> 
> You can take a look at cp/error.c:
> 
> /* Called from output_format -- during diagnostic message processing --
>   to handle C++ specific format specifier with the following meanings:
>   %A   function argument-list.
>   %C   tree code.
>   %D   declaration.
>   %E   expression.
>   %F   function declaration.
>   %H   type difference (from).
>   %I   type difference (to).
>   %L   language as used in extern "lang".
>   %O   binary operator.
>   %P   function parameter whose position is indicated by an integer.
>   %Q   assignment operator.
>   %S   substitution (template + args)
>   %T   type.
>   %V   cv-qualifier.
>   %X   exception-specification.  */
> static bool 
> cp_printer (pretty_printer *pp, text_info *text, const char *spec,
> 
> or c/c-objc-common.c:
> 
> /* Called during diagnostic message formatting process to print a
>   source-level entity onto BUFFER.  The meaning of the format specifiers
>   is as follows:
>   %D: a general decl,
>   %E: an identifier or expression,
>   %F: a function declaration,
>   %T: a type.
>   %V: a list of type qualifiers from a tree.
>   %v: an explicit list of type qualifiers
>   %#v: an explicit list of type qualifiers of a function type.
> 
>   Please notice when called, the `%' part was already skipped by the
>   diagnostic machinery.  */
> static bool
> c_tree_printer (pretty_printer *pp, text_info *text, const char *spec,
> 
> Marek
> 



[Ada] Fix type conversion handling in validity checks

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
In case of a checked type conversion, correctly update Typ to match the
expression being validated and call Analyze_And_Resolve on the modified
expression.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* checks.adb (Insert_Valid_Check): in case of checked type
conversion, update Typ to match Exp's type and add call to
Analyze_And_Resolve.diff --git a/gcc/ada/checks.adb b/gcc/ada/checks.adb
--- a/gcc/ada/checks.adb
+++ b/gcc/ada/checks.adb
@@ -8077,7 +8077,7 @@ package body Checks is
   Is_High_Bound : Boolean   := False)
is
   Loc : constant Source_Ptr := Sloc (Expr);
-  Typ : constant Entity_Id  := Etype (Expr);
+  Typ : Entity_Id   := Etype (Expr);
   Exp : Node_Id;
 
begin
@@ -8137,6 +8137,7 @@ package body Checks is
   while Nkind (Exp) = N_Type_Conversion loop
  Exp := Expression (Exp);
   end loop;
+  Typ := Etype (Exp);
 
   --  Do not generate a check for a variable which already validates the
   --  value of an assignable object.
@@ -8217,6 +8218,14 @@ package body Checks is
Set_Do_Range_Check (Validated_Object (Var_Id), False);
 end if;
 
+--  In case of a type conversion, an expansion of the expr may be
+--  needed (eg. fixed-point as actual).
+
+if Exp /= Expr then
+   pragma Assert (Nkind (Expr) = N_Type_Conversion);
+   Analyze_And_Resolve (Expr);
+end if;
+
 PV := New_Occurrence_Of (Var_Id, Loc);
 
  --  Otherwise the expression does not denote a variable. Force its




[Ada] Issue warning on unused quantified expression

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
It is common that a quantified expression takes the form of a
conjunction or disjunction. In such a case, it is expected that all
conjuncts/ disjuncts reference the quantified variable. Not doing so can
be either the symptom of an error, or of a non-optimal expression, as
that sub-expression could be extracted from the quantified expression.
This is beneficial for both execution (speed) and for proof
(automation).

Issue a warning in such a case.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch4.adb (Analyze_QUantified_Expression): Issue warning on
conjunct/disjunct sub-expression of the full expression inside a
quantified expression, when it does not reference the quantified
variable.diff --git a/gcc/ada/sem_ch4.adb b/gcc/ada/sem_ch4.adb
--- a/gcc/ada/sem_ch4.adb
+++ b/gcc/ada/sem_ch4.adb
@@ -4299,21 +4299,67 @@ package body Sem_Ch4 is
  Loop_Id := Defining_Identifier (Loop_Parameter_Specification (N));
   end if;
 
-  if Warn_On_Suspicious_Contract
-and then not Referenced (Loop_Id, Cond)
-and then not Is_Internal_Name (Chars (Loop_Id))
-  then
- --  Generating C, this check causes spurious warnings on inlined
- --  postconditions; we can safely disable it because this check
- --  was previously performed when analyzing the internally built
- --  postconditions procedure.
+  declare
+ type Subexpr_Kind is (Full, Conjunct, Disjunct);
 
- if Modify_Tree_For_C and then In_Inlined_Body then
-null;
- else
-Error_Msg_N ("?T?unused variable &", Loop_Id);
+ procedure Check_Subexpr (Expr : Node_Id; Kind : Subexpr_Kind);
+ --  Check that the quantified variable appears in every sub-expression
+ --  of the quantified expression. If Kind is Full, Expr is the full
+ --  expression. If Kind is Conjunct (resp. Disjunct), Expr is a
+ --  conjunct (resp. disjunct) of the full expression.
+
+ ---
+ -- Check_Subexpr --
+ ---
+
+ procedure Check_Subexpr (Expr : Node_Id; Kind : Subexpr_Kind) is
+ begin
+if Nkind (Expr) in N_Op_And | N_And_Then
+  and then Kind /= Disjunct
+then
+   Check_Subexpr (Left_Opnd (Expr), Conjunct);
+   Check_Subexpr (Right_Opnd (Expr), Conjunct);
+
+elsif Nkind (Expr) in N_Op_Or | N_Or_Else
+  and then Kind /= Conjunct
+then
+   Check_Subexpr (Left_Opnd (Expr), Disjunct);
+   Check_Subexpr (Right_Opnd (Expr), Disjunct);
+
+elsif Kind /= Full
+  and then not Referenced (Loop_Id, Expr)
+then
+   declare
+  Sub : constant String :=
+(if Kind = Conjunct then "conjunct" else "disjunct");
+   begin
+  Error_Msg_NE
+("?T?unused variable & in " & Sub, Expr, Loop_Id);
+  Error_Msg_NE
+("\consider extracting " & Sub & " from quantified "
+ & "expression", Expr, Loop_Id);
+   end;
+end if;
+ end Check_Subexpr;
+
+  begin
+ if Warn_On_Suspicious_Contract
+   and then not Is_Internal_Name (Chars (Loop_Id))
+
+   --  Generating C, this check causes spurious warnings on inlined
+   --  postconditions; we can safely disable it because this check
+   --  was previously performed when analyzing the internally built
+   --  postconditions procedure.
+
+   and then not (Modify_Tree_For_C and In_Inlined_Body)
+ then
+if not Referenced (Loop_Id, Cond) then
+   Error_Msg_N ("?T?unused variable &", Loop_Id);
+else
+   Check_Subexpr (Cond, Kind => Full);
+end if;
  end if;
-  end if;
+  end;
 
   --  Diagnose a possible misuse of the SOME existential quantifier. When
   --  we have a quantified expression of the form:




[Ada] Get rid of Frontend_Exceptions refs

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
Cleanup and remove some unused system specs and references to same.
These are specs that set Frontend_Exceptions, which is no longer
used in GNAT.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* Makefile.rtl: Remove references to system-vxworks-ppc.ads
and system-vxworks-x86.ads.
* libgnat/system-vxworks-ppc.ads: Remove.
* libgnat/system-vxworks-ppc-ravenscar.ads: Likewise.
* libgnat/system-vxworks-x86.ads: Likewise.diff --git a/gcc/ada/Makefile.rtl b/gcc/ada/Makefile.rtl
--- a/gcc/ada/Makefile.rtl
+++ b/gcc/ada/Makefile.rtl
@@ -1166,9 +1166,6 @@ ifeq ($(strip $(filter-out powerpc% wrs vxworks vxworksspe vxworks7% vxworks7spe
   s-vxwext.adshttp://www.gnu.org/licenses/>.  --
---  --
--- GNAT was originally developed  by the GNAT team at  New York University. --
--- Extensive contributions were provided by Ada Core Technologies Inc.  --
---  --
---
-
---  This is a Ravenscar VxWorks version of this package for PowerPC targets
-
-pragma Restrictions (No_Exception_Propagation);
---  Only local exception handling is supported in this profile
-
-pragma Restrictions (No_Exception_Registration);
---  Disable exception name registration. This capability is not used because
---  it is only required by exception stream attributes which are not supported
---  in this run time.
-
-pragma Restrictions (No_Implicit_Dynamic_Code);
---  Pointers to nested subprograms are not allowed in this run time, in order
---  to prevent the compiler from building "trampolines".
-
-pragma Restrictions (No_Finalization);
---  Controlled types are not supported in this run time
-
-pragma Profile (Ravenscar);
---  This is a Ravenscar run time
-
-pragma Discard_Names;
---  Disable explicitly the generation of names associated with entities in
---  order to reduce the amount of storage used. These names are not used anyway
---  (attributes such as 'Image and 'Value are not supported in this run time).
-
-package System is
-   pragma Pure;
-   --  Note that we take advantage of the implementation permission to make
-   --  this unit Pure instead of Preelaborable; see RM 13.7.1(15). In Ada
-   --  2005, this is Pure in any case (AI-362).
-
-   pragma No_Elaboration_Code_All;
-   --  Allow the use of that restriction in units that WITH this unit
-
-   type Name is (SYSTEM_NAME_GNAT);
-   System_Name : constant Name := SYSTEM_NAME_GNAT;
-
-   --  System-Dependent Named Numbers
-
-   Min_Int : constant := -2 ** (Standard'Max_Integer_Size - 1);
-   Max_Int : constant :=  2 ** (Standard'Max_Integer_Size - 1) - 1;
-
-   Max_Binary_Modulus: constant := 2 ** Standard'Max_Integer_Size;
-   Max_Nonbinary_Modulus : constant := 2 ** Integer'Size - 1;
-
-   Max_Base_Digits   : constant := Long_Long_Float'Digits;
-   Max_Digits: constant := Long_Long_Float'Digits;
-
-   Max_Mantissa  : constant := Standard'Max_Integer_Size - 1;
-   Fine_Delta: constant := 2.0 ** (-Max_Mantissa);
-
-   Tick  : constant := 1.0 / 60.0;
-
-   --  Storage-related Declarations
-
-   type Address is private;
-   pragma Preelaborable_Initialization (Address);
-   Null_Address : constant Address;
-
-   Storage_Unit : constant := 8;
-   Word_Size: constant := 32;
-   Memory_Size  : constant := 2 ** 32;
-
-   --  Address comparison
-
-   function "<"  (Left, Right : Address) return Boolean;
-   function "<=" (Left, Right : Address) return Boolean;
-   function ">"  (Left, Right : Address) return Boolean;
-   function ">=" (Left, Right : Address) return Boolean;
-   function "="  (Left, Right : Address) return Boolean;
-
-   pragma Import (Intrinsic, "<");
-   pragma Import (Intrinsic, "<=");
-   pragma Import (Intrinsic, ">");
-   pragma Import (Intrinsic, ">=");
-   pragma Import (Intrinsic, "=");
-
-   --  Other System-Dependent Declarations
-
-   type Bit_Order is (High_Order_First, Low_Order_First);
-   Default_Bit_Order : constant Bit_Order := High_Order_First;
-   pragma Warnings (Off, Default_Bit_Order); -- kill constant condition warning
-
-   --  Priority-related Declarations (RM D.1)
-
-   --  Ada priorities are mapped to VxWorks priorities using the following
-   --  transformation: 255 - Ada Priority
-
-   --  Ada priorities are used as follows:
-
-   --  256is reserved for the VxWorks kernel
-   --  248 - 255  correspond to hardware interrupt levels 0 .. 7
-   --  247is a catchall default "interrupt" priority for signals,
-   -- allowing higher priority than normal tasks, but lower than
-   -- hardware priority levels.  Protected Object ceilings can
-   -- override these values.
-   --  246is used by the In

[Ada] Small cleanup in Eval_Integer_Literal

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
This removes an unreachable case in a nested predicate function as well
as trims down a verbose condition.  No functional changes.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_eval.ads (Check_Non_Static_Context): Update documentation.
* sem_eval.adb (In_Any_Integer_Context): Change parameter type,
adjust accordingly and remove unreachable case.
(Eval_Integer_Literal): Consider the node kind throughout and
trim down verbose condition.diff --git a/gcc/ada/sem_eval.adb b/gcc/ada/sem_eval.adb
--- a/gcc/ada/sem_eval.adb
+++ b/gcc/ada/sem_eval.adb
@@ -2845,7 +2845,7 @@ package body Sem_Eval is
--  the expander that do not correspond to static expressions.
 
procedure Eval_Integer_Literal (N : Node_Id) is
-  function In_Any_Integer_Context (Context : Node_Id) return Boolean;
+  function In_Any_Integer_Context (K : Node_Kind) return Boolean;
   --  If the literal is resolved with a specific type in a context where
   --  the expected type is Any_Integer, there are no range checks on the
   --  literal. By the time the literal is evaluated, it carries the type
@@ -2856,23 +2856,21 @@ package body Sem_Eval is
   -- In_Any_Integer_Context --
   
 
-  function In_Any_Integer_Context (Context : Node_Id) return Boolean is
+  function In_Any_Integer_Context (K : Node_Kind) return Boolean is
   begin
  --  Any_Integer also appears in digits specifications for real types,
  --  but those have bounds smaller that those of any integer base type,
  --  so we can safely ignore these cases.
 
- return
-   Nkind (Context) in N_Attribute_Definition_Clause
-| N_Attribute_Reference
-| N_Modular_Type_Definition
-| N_Number_Declaration
-| N_Signed_Integer_Type_Definition;
+ return K in N_Attribute_Definition_Clause
+   | N_Modular_Type_Definition
+   | N_Number_Declaration
+   | N_Signed_Integer_Type_Definition;
   end In_Any_Integer_Context;
 
   --  Local variables
 
-  Par : constant Node_Id   := Parent (N);
+  PK  : constant Node_Kind := Nkind (Parent (N));
   Typ : constant Entity_Id := Etype (N);
 
--  Start of processing for Eval_Integer_Literal
@@ -2890,12 +2888,11 @@ package body Sem_Eval is
   --  Check_Non_Static_Context on an expanded literal may lead to spurious
   --  and misleading warnings.
 
-  if (Nkind (Par) in N_Case_Expression_Alternative | N_If_Expression
-   or else Nkind (Par) not in N_Subexpr)
-and then (Nkind (Par) not in N_Case_Expression_Alternative
-   | N_If_Expression
-   or else Comes_From_Source (N))
-and then not In_Any_Integer_Context (Par)
+  if (PK not in N_Subexpr
+   or else (PK in N_Case_Expression_Alternative | N_If_Expression
+ and then
+Comes_From_Source (N)))
+and then not In_Any_Integer_Context (PK)
   then
  Check_Non_Static_Context (N);
   end if;


diff --git a/gcc/ada/sem_eval.ads b/gcc/ada/sem_eval.ads
--- a/gcc/ada/sem_eval.ads
+++ b/gcc/ada/sem_eval.ads
@@ -149,10 +149,9 @@ package Sem_Eval is
--
--  Note: most cases of non-static context checks are handled within
--  Sem_Eval itself, including all cases of expressions at the outer level
-   --  (i.e. those that are not a subexpression). Currently the only outside
-   --  customer for this procedure is Sem_Attr (because Eval_Attribute is
-   --  there). There is also one special case arising from ranges (see body of
-   --  Resolve_Range).
+   --  (i.e. those that are not a subexpression). The outside customers for
+   --  this procedure are Sem_Aggr, Sem_Attr (because Eval_Attribute is there)
+   --  and Sem_Res (for a special case arising from ranges, see Resolve_Range).
--
--  Note: this procedure is also called by GNATprove on real literals
--  that are not sub-expressions of static expressions, to convert them to




[Ada] Rewrite tests on Convention_Intrinsic

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
Testing for Convention_Intrinsic is not the proper way in order to spot
intrinsic subprograms, calling the predicate Is_Intrinsic_Subprogram is.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* gcc-interface/decl.c (gnat_to_gnu_entity) :
Replace test on Convention_Intrinsic with Is_Intrinsic_Subprogram.
(gnat_to_gnu_param): Likewise.
(gnat_to_gnu_subprog_type): Likewise.
* gcc-interface/trans.c (elaborate_all_entities_for_package): Ditto.diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c
--- a/gcc/ada/gcc-interface/decl.c
+++ b/gcc/ada/gcc-interface/decl.c
@@ -3946,7 +3946,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 	bool extern_flag
 	  = ((Is_Public (gnat_entity) && !definition)
 	 || imported_p
-	 || (Convention (gnat_entity) == Convention_Intrinsic
+	 || (Is_Intrinsic_Subprogram (gnat_entity)
 		 && Has_Pragma_Inline_Always (gnat_entity)));
 	tree gnu_param_list;
 
@@ -5366,7 +5366,7 @@ gnat_to_gnu_param (Entity_Id gnat_param, tree gnu_param_type, bool first,
   /* Builtins are expanded inline and there is no real call sequence involved.
  So the type expected by the underlying expander is always the type of the
  argument "as is".  */
-  if (Convention (gnat_subprog) == Convention_Intrinsic
+  if (Is_Intrinsic_Subprogram (gnat_subprog)
   && Present (Interface_Name (gnat_subprog)))
 mech = By_Copy;
 
@@ -5823,9 +5823,10 @@ gnat_to_gnu_subprog_type (Entity_Id gnat_subprog, bool definition,
 
   else
 {
-  /* For foreign convention subprograms, return System.Address as void *
-	 or equivalent.  Note that this comprises GCC builtins.  */
-  if (Has_Foreign_Convention (gnat_subprog)
+  /* For foreign convention/intrinsic subprograms, return System.Address
+	 as void * or equivalent; this comprises GCC builtins.  */
+  if ((Has_Foreign_Convention (gnat_subprog)
+	   || Is_Intrinsic_Subprogram (gnat_subprog))
 	  && Is_Descendant_Of_Address (Underlying_Type (gnat_return_type)))
 	gnu_return_type = ptr_type_node;
   else
@@ -5995,9 +5996,10 @@ gnat_to_gnu_subprog_type (Entity_Id gnat_subprog, bool definition,
 	{
 	  Entity_Id gnat_param_type = Etype (gnat_param);
 
-	  /* For foreign convention subprograms, pass System.Address as void *
-	 or equivalent.  Note that this comprises GCC builtins.  */
-	  if (Has_Foreign_Convention (gnat_subprog)
+	  /* For foreign convention/intrinsic subprograms, pass System.Address
+	 as void * or equivalent; this comprises GCC builtins.  */
+	  if ((Has_Foreign_Convention (gnat_subprog)
+	   || Is_Intrinsic_Subprogram (gnat_subprog))
 	  && Is_Descendant_Of_Address (Underlying_Type (gnat_param_type)))
 	gnu_param_type = ptr_type_node;
 	  else
@@ -6303,7 +6305,7 @@ gnat_to_gnu_subprog_type (Entity_Id gnat_subprog, bool definition,
 
   /* If this subprogram is expectedly bound to a GCC builtin, fetch the
 	 corresponding DECL node and check the parameter association.  */
-  if (Convention (gnat_subprog) == Convention_Intrinsic
+  if (Is_Intrinsic_Subprogram (gnat_subprog)
 	  && Present (Interface_Name (gnat_subprog)))
 	{
 	  tree gnu_ext_name = create_concat_name (gnat_subprog, NULL);


diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c
--- a/gcc/ada/gcc-interface/trans.c
+++ b/gcc/ada/gcc-interface/trans.c
@@ -9079,7 +9079,7 @@ elaborate_all_entities_for_package (Entity_Id gnat_package)
 	continue;
 
   /* Skip stuff internal to the compiler.  */
-  if (Convention (gnat_entity) == Convention_Intrinsic)
+  if (Is_Intrinsic_Subprogram (gnat_entity))
 	continue;
   if (kind == E_Operator)
 	continue;




[Ada] Expose and use type-generic GCC atomic builtins

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
This exposes the newly added support for type-generic GCC atomic builtins
to the user through the System.Atomic_Primitives package, where a generic
version of the existing routines is added.  This also uses this support
in the implementation of the System.Atomic_Operations packages.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch12.adb (Analyze_Subprogram_Instantiation): Also propagate an
interface name on an intrinsic subprogram.  Remove obsolete comment.
* libgnat/s-atopri.ads (Atomic_Load): New generic intrinsic function
(Atomic_Load_8): Rewrite into instantiation.
(Atomic_Load_16): Likewise.
(Atomic_Load_32): Likewise.
(Atomic_Load_64): Likewise.
(Sync_Compare_And_Swap): New generic intrinsic function.
(Sync_Compare_And_Swap_8): Rewrite into instantiation.
(Sync_Compare_And_Swap_16): Likewise.
(Sync_Compare_And_Swap_32): Likewise.
(Sync_Compare_And_Swap_64): Likewise.
(Lock_Free_Read): New generic inline function.
(Lock_Free_Read_8): Rewrite into instantiation.
(Lock_Free_Read_16): Likewise.
(Lock_Free_Read_32): Likewise.
(Lock_Free_Read_64): Likewise.
(Lock_Free_Try_Write): New generic inline function.
(Lock_Free_Try_Write_8): Rewrite into instantiation.
(Lock_Free_Try_Write_16): Likewise.
(Lock_Free_Try_Write_32): Likewise.
(Lock_Free_Try_Write_64): Likewise.
* libgnat/s-atopri.adb (Lock_Free_Read): New function body.
(Lock_Free_Read_8): Delete.
(Lock_Free_Read_16): Likewise.
(Lock_Free_Read_32): Likewise.
(Lock_Free_Read_64): Likewise.
(Lock_Free_Try_Write): New function body.
(Lock_Free_Try_Write_8): Delete.
(Lock_Free_Try_Write_16): Likewise.
(Lock_Free_Try_Write_32): Likewise.
(Lock_Free_Try_Write_64): Likewise.
* libgnat/s-aoinar.adb (Atomic_Fetch_And_Add): Use type-generic GCC
atomic builtin and tidy up implementation.
(Atomic_Fetch_And_Subtract): Likewise.
* libgnat/s-aomoar.adb (Atomic_Fetch_And_Add): Likewise.
(Atomic_Fetch_And_Subtract): Likewise.
* libgnat/s-atopex.adb (Atomic_Exchange): Likewise.
(Atomic_Compare_And_Exchange): Likewise.

patch.diff.gz
Description: application/gzip


[Ada] Prevent use of an uninitialized AST field with universal integer

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
A temporary workaround needed by GNATprove after cleaning up the
handling of AST fields with universal integers.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_spark.adb (Expand_SPARK_N_Attribute_Reference): Guard
against equality of an uninitialized RM_Size field.diff --git a/gcc/ada/exp_spark.adb b/gcc/ada/exp_spark.adb
--- a/gcc/ada/exp_spark.adb
+++ b/gcc/ada/exp_spark.adb
@@ -451,6 +451,7 @@ package body Exp_SPARK is
Apply_Universal_Integer_Attribute_Checks (N);
 
if Present (Typ)
+ and then Known_RM_Size (Typ)
  and then RM_Size (Typ) = RM_Size (Standard_Long_Long_Integer)
then
   --  ??? This should rather be a range check, but this would




[Ada] Warning on nonmatching subtypes in fully conforming subprogram specs and bodies

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
When corresponding parameter subtypes or result subtypes denote
different declarations between the declaration and body of a subprogram,
but those are fully conforming, a warning will be issued indicating that
the subtypes come from different declarations. In the case of anonymous
access subtypes, the designated subtypes are checked. The warning is
conditioned on the switch -gnatw_p (for "pedantic checks"), introduced
as part of these changes.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_ch6.adb: Add with and use of Warnsw.
(Check_Conformance): Report a warning when subtypes or
designated subtypes of formal parameters or result subtypes
denote different declarations between the spec and body of the
(Subprogram_Subtypes_Have_Same_Declaration): New function nested
within Check_Conformance that determines whether two subtype
entities referenced in a subprogram come from the same
declaration. Returns True immediately if the subprogram is in a
generic instantiation, or the subprogram is marked Is_Internal
or is declared in an internal (GNAT library) unit, or GNAT_Mode
is enabled, otherwise compares the nonlimited views of the
entities (or their designated subtypes' nonlimited views in the
anonymous access cases).
(Nonlimited_View_Of_Subtype): New function nested within
function Subprogram_Subtypes_Have_Same_Declaration that returns
Non_Limited_View of a type or subtype that is an incomplete or
class-wide type that comes from a limited of a
package (From_Limited_With is True for the entity), or returns
Full_View when the nonlimited view is an incomplete type.
Otherwise returns the entity passed in.
* warnsw.ads (Warn_On_Pedantic_Checks): New warning flag.
(type Warning_Record): New component Warn_On_Pedantic_Checks.
* warnsw.adb (All_Warnings): Set Warn_On_Pedantic_Checks from
parameter Setting.
(Restore_Warnings): Restore the value of the
Warn_On_Pedantic_Checks flag.
(Save_Warnings): Save the value of the Warn_On_Pedantic_Checks
flag.
(Set_Underscore_Warning_Switch): Add settings of the
Warn_On_Pedantic flag according to the switch ("-gnatw_p" vs.
"-gnatw_P").
* doc/gnat_ugn/building_executable_programs_with_gnat.rst: Add
documentation of new switches -gnatw_p and -gnatw_P (warnings
for pedantic checks).
* gnat_ugn.texi: Regenerate.
* usage.adb: Add Warn_On_Pedantic_Checks.diff --git a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
--- a/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
+++ b/gcc/ada/doc/gnat_ugn/building_executable_programs_with_gnat.rst
@@ -3582,6 +3582,25 @@ of the pragma in the :title:`GNAT_Reference_manual`).
   ordering.
 
 
+.. index:: -gnatw_p  (gcc)
+
+:switch:`-gnatw_p`
+  *Activate warnings for pedantic checks.*
+
+  This switch activates warnings for the failure of certain pedantic checks.
+  The only case currently supported is a check that the subtype_marks given
+  for corresponding formal parameter and function results in a subprogram
+  declaration and its body denote the same subtype declaration. The default
+  is that such warnings are not given.
+
+.. index:: -gnatw_P  (gcc)
+
+:switch:`-gnatw_P`
+  *Suppress warnings for pedantic checks.*
+
+  This switch suppresses warnings on violations of pedantic checks.
+
+
 .. index:: -gnatwq  (gcc)
 .. index:: Parentheses, warnings
 


diff --git a/gcc/ada/gnat_ugn.texi b/gcc/ada/gnat_ugn.texi
--- a/gcc/ada/gnat_ugn.texi
+++ b/gcc/ada/gnat_ugn.texi
@@ -21,7 +21,7 @@
 
 @copying
 @quotation
-GNAT User's Guide for Native Platforms , Sep 28, 2021
+GNAT User's Guide for Native Platforms , Oct 19, 2021
 
 AdaCore
 
@@ -11800,6 +11800,34 @@ This switch suppresses warnings on cases of suspicious parameter
 ordering.
 @end table
 
+@geindex -gnatw_p (gcc)
+
+
+@table @asis
+
+@item @code{-gnatw_p}
+
+@emph{Activate warnings for pedantic checks.}
+
+This switch activates warnings for the failure of certain pedantic checks.
+The only case currently supported is a check that the subtype_marks given
+for corresponding formal parameter and function results in a subprogram
+declaration and its body denote the same subtype declaration. The default
+is that such warnings are not given.
+@end table
+
+@geindex -gnatw_P (gcc)
+
+
+@table @asis
+
+@item @code{-gnatw_P}
+
+@emph{Suppress warnings for pedantic checks.}
+
+This switch suppresses warnings on violations of pedantic checks.
+@end table
+
 @geindex -gnatwq (gcc)
 
 @geindex Parentheses


diff --git a/gcc/ada/sem_ch6.adb b/gcc/ada/sem_ch6.adb
--- a/gcc/ada/sem_ch6.adb
+++ b/gcc/ada/sem_ch6.adb
@@ -90,6 +90,7 @@ with Tbuild; use Tbuild;
 with Uintp;  use Uintp;
 with Urealp;   

[Ada] tech debt: Clean up Uint fields, such as Esize

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
Use No_Uint to indicate "unknown" or "not yet known" for various fields
whose type is Uint, instead of using Uint_0. Otherwise Uint_0 could be
ambiguous -- it could also mean "value is known, and is zero". This
patch does not fix all bugs in this area, but fixes most of them, and
adds assertions that should prevent new ones.

Work around unset sizes:
Previous changes caused several latent bugs to become crashes in e.g.
gnat-llvm, because some size-related fields aren't set. Previously, unset
fields defaulted to Uint_0, but now they default to No_Uint, which crashes
on most operations, such as "=" and ">". This patch turns them back into latent
bugs by checking for No_Uint explictly.

Misc cleanup.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* atree.ads: Comment improvements. How is a "completely new
node" different from a "new node"? Document default values
corresponding to field types.
* exp_ch7.adb (Process_Tagged_Type_Declaration): Use
higher-level Scope_Depth instead of Scope_Depth_Value.  Remove
confusing comment: not clear what a "true" library level package
is.
* uintp.adb (Image_Out): Print No_Uint in a more readable way.
* gen_il-gen.adb, gen_il-gen-gen_entities.adb,
gen_il-gen-gen_nodes.adb, gen_il-types.ads: Tighten up the
subtypes of fields whose type is Uint, where possible; use
more-constrained subtypes such as Unat.
* einfo-utils.adb, einfo-utils.ads, exp_attr.adb,
exp_ch3.adb, exp_intr.adb, exp_unst.adb, exp_util.adb,
freeze.adb, repinfo.adb, sem.adb, sem_ch12.adb, sem_ch13.adb,
sem_ch3.adb, sem_ch8.adb, sem_util.adb, sprint.adb, treepr.adb:
No longer use Uint_0 to indicate "unknown" or "not yet known"
for various fields whose type is Uint. Use No_Uint for that,
except in a small number of legacy cases that cause test
failures. Protect many queries of such fields with calls to
Known_... functions. Improve comments.
* exp_aggr.adb: Likewise.
(Is_OK_Aggregate): Check whether Csiz is present.
(Aggr_Assignment_OK_For_Backend): Ensure we do not access an
uninitialized size.
* exp_strm.adb (Build_Elementary_Input_Call,
Build_Elementary_Write_Call): Check whether P_Size is present.
* cstand.adb: Leave Component_Size of Any_Composite unknown.
Similar for RM_Size of Standard_Exception_Type.  These should
not be used.
* einfo.ads: Comment improvements.
* exp_disp.ads: Minor.
* gen_il-internals.ads, gen_il-internals.adb: Minor.
* sinfo-utils.adb: Take advantage of full-coverage rules.
* types.h: Minor.

patch.diff.gz
Description: application/gzip


[Ada] Refine type of a counter function for record delta aggregate

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
Distance of a variant in the enclosing type declaration is never
negative. Code cleanup related to fix for boxes in record delta
aggregates; semantics is unaffected.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* sem_aggr.adb (Variant_Depth): Refine type from Integer to
Natural.diff --git a/gcc/ada/sem_aggr.adb b/gcc/ada/sem_aggr.adb
--- a/gcc/ada/sem_aggr.adb
+++ b/gcc/ada/sem_aggr.adb
@@ -3384,9 +3384,8 @@ package body Sem_Aggr is
   function Nested_In (V1 : Node_Id; V2 : Node_Id) return Boolean;
   --  Determine whether variant V1 is within variant V2
 
-  function Variant_Depth (N : Node_Id) return Integer;
-  --  Determine the distance of a variant to the enclosing type
-  --  declaration.
+  function Variant_Depth (N : Node_Id) return Natural;
+  --  Determine the distance of a variant to the enclosing type declaration
 
   
   --  Check_Variant --
@@ -3492,8 +3491,8 @@ package body Sem_Aggr is
   -- Variant_Depth --
   ---
 
-  function Variant_Depth (N : Node_Id) return Integer is
- Depth : Integer;
+  function Variant_Depth (N : Node_Id) return Natural is
+ Depth : Natural;
  Par   : Node_Id;
 
   begin




[Ada] Crash on object of protected type with defaulted access component

2021-10-20 Thread Pierre-Marie de Rodat via Gcc-patches
This patch corrects issues in the compiler whereby default initializing
a protected type component of an access type containing controlled parts
with an allocator causes a crash at compile-time at the point of an
object declaration of such protected type.

Tested on x86_64-pc-linux-gnu, committed on trunk

gcc/ada/

* exp_ch7.adb (Make_Final_Call): Detect expanded protected types
and use original protected type in order to calculate
appropriate finalization routine.diff --git a/gcc/ada/exp_ch7.adb b/gcc/ada/exp_ch7.adb
--- a/gcc/ada/exp_ch7.adb
+++ b/gcc/ada/exp_ch7.adb
@@ -8953,11 +8953,12 @@ package body Exp_Ch7 is
   Typ   : Entity_Id;
   Skip_Self : Boolean := False) return Node_Id
is
-  Loc: constant Source_Ptr := Sloc (Obj_Ref);
-  Atyp   : Entity_Id;
-  Fin_Id : Entity_Id := Empty;
-  Ref: Node_Id;
-  Utyp   : Entity_Id;
+  Loc  : constant Source_Ptr := Sloc (Obj_Ref);
+  Atyp : Entity_Id;
+  Prot_Typ : Entity_Id := Empty;
+  Fin_Id   : Entity_Id := Empty;
+  Ref  : Node_Id;
+  Utyp : Entity_Id;
 
begin
   Ref := Obj_Ref;
@@ -9035,6 +9036,19 @@ package body Exp_Ch7 is
  Set_Assignment_OK (Ref);
   end if;
 
+  --  Detect if Typ is a protected type or an expanded protected type and
+  --  store the relevant type within Prot_Typ for later processing.
+
+  if Is_Protected_Type (Typ) then
+ Prot_Typ := Typ;
+
+  elsif Ekind (Typ) = E_Record_Type
+and then Present (Corresponding_Concurrent_Type (Typ))
+and then Is_Protected_Type (Corresponding_Concurrent_Type (Typ))
+  then
+ Prot_Typ := Corresponding_Concurrent_Type (Typ);
+  end if;
+
   --  The underlying type may not be present due to a missing full view. In
   --  this case freezing did not take place and there is no [Deep_]Finalize
   --  primitive to call.
@@ -9080,7 +9094,7 @@ package body Exp_Ch7 is
   --  Protected types: these also require finalization even though they
   --  are not marked controlled explicitly.
 
-  elsif Is_Protected_Type (Typ) then
+  elsif Present (Prot_Typ) then
  --  Protected objects do not need to be finalized on restricted
  --  runtimes.
 
@@ -9090,7 +9104,7 @@ package body Exp_Ch7 is
  --  ??? Only handle the simple case for now. Will not support a record
  --  or array containing protected objects.
 
- elsif Is_Simple_Protected_Type (Typ) then
+ elsif Is_Simple_Protected_Type (Prot_Typ) then
 Fin_Id := RTE (RE_Finalize_Protection);
  else
 raise Program_Error;




  1   2   >