Re: [PATCH] IBM Z: Fix address of operands will never be NULL warnings

2021-11-02 Thread Andreas Krebbel via Gcc-patches
On 10/30/21 12:43, Stefan Schulze Frielinghaus wrote:
> Since a recent enhancement of -Waddress a couple of warnings are emitted
> and turned into errors during bootstrap:
> 
> gcc/config/s390/s390.md:12087:25: error: the address of 'operands' will never 
> be NULL [-Werror=address]
> 12087 |   "TARGET_HTM && operands != NULL
> build/gencondmd.c:59:12: note: 'operands' declared here
>59 | extern rtx operands[];
>   |^~~~
> 
> Fixed by removing those non-null checks.
> Bootstrapped and regtested on IBM Z.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.md ("*cc_to_int", "tabort", "*tabort_1",
>   "*tabort_1_plus"): Remove operands non-null check.

Ok. Thanks!

Andreas


[PATCH] [RFC][PR102768] aarch64: Add compiler support for Shadow Call Stack

2021-11-02 Thread Dan Li via Gcc-patches
Shadow Call Stack can be used to protect the return address of a
function at runtime, and clang already supports this feature[1].

To enable SCS in user mode, in addition to compiler, other support
is also required (as described in [2]). This patch only adds basic
support for SCS from the compiler side, and provides convenience
for users to enable SCS.

For linux kernel, only the support of the compiler is required.

[1] https://clang.llvm.org/docs/ShadowCallStack.html
[2] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102768

gcc/c-family/ChangeLog:

* c-attribs.c (handle_no_sanitize_shadow_call_stack_attribute):

gcc/ChangeLog:

* config/aarch64/aarch64-protos.h (aarch64_shadow_call_stack_enabled):
* config/aarch64/aarch64.c (aarch64_shadow_call_stack_enabled):
(aarch64_expand_prologue):
(aarch64_expand_epilogue):
* config/aarch64/aarch64.h (TARGET_SUPPORT_SHADOW_CALL_STACK):
* config/aarch64/aarch64.md (scs_push):
(scs_pop):
* defaults.h (TARGET_SUPPORT_SHADOW_CALL_STACK):
* flag-types.h (enum sanitize_code):
* opts.c (finish_options):

Signed-off-by: Dan Li 
---
 gcc/c-family/c-attribs.c| 21 +
 gcc/config/aarch64/aarch64-protos.h |  1 +
 gcc/config/aarch64/aarch64.c| 27 +++
 gcc/config/aarch64/aarch64.h|  4 
 gcc/config/aarch64/aarch64.md   | 18 ++
 gcc/defaults.h  |  4 
 gcc/flag-types.h|  2 ++
 gcc/opts.c  |  6 ++
 8 files changed, 83 insertions(+)

diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index 007b928c54b..9b3a35c06bf 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -56,6 +56,8 @@ static tree handle_cold_attribute (tree *, tree, tree, int, 
bool *);
 static tree handle_no_sanitize_attribute (tree *, tree, tree, int, bool *);
 static tree handle_no_sanitize_address_attribute (tree *, tree, tree,
  int, bool *);
+static tree handle_no_sanitize_shadow_call_stack_attribute (tree *, tree,
+ tree, int, bool *);
 static tree handle_no_sanitize_thread_attribute (tree *, tree, tree,
 int, bool *);
 static tree handle_no_address_safety_analysis_attribute (tree *, tree, tree,
@@ -454,6 +456,10 @@ const struct attribute_spec c_common_attribute_table[] =
  handle_no_sanitize_attribute, NULL },
   { "no_sanitize_address",0, 0, true, false, false, false,
  handle_no_sanitize_address_attribute, NULL },
+  { "no_sanitize_shadow_call_stack",
+ 0, 0, true, false, false, false,
+ handle_no_sanitize_shadow_call_stack_attribute,
+ NULL },
   { "no_sanitize_thread", 0, 0, true, false, false, false,
  handle_no_sanitize_thread_attribute, NULL },
   { "no_sanitize_undefined",  0, 0, true, false, false, false,
@@ -1175,6 +1181,21 @@ handle_no_sanitize_address_attribute (tree *node, tree 
name, tree, int,
   return NULL_TREE;
 }
 
+/* Handle a "no_sanitize_shadow_call_stack" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+handle_no_sanitize_shadow_call_stack_attribute (tree *node, tree name,
+ tree, int, bool *no_add_attrs)
+{
+  *no_add_attrs = true;
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+warning (OPT_Wattributes, "%qE attribute ignored", name);
+  else
+add_no_sanitize_value (*node, SANITIZE_SHADOW_CALL_STACK);
+
+  return NULL_TREE;
+}
+
 /* Handle a "no_sanitize_thread" attribute; arguments as in
struct attribute_spec.handler.  */
 
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 768e8fae136..150c015df21 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -893,6 +893,7 @@ void aarch64_register_pragmas (void);
 void aarch64_relayout_simd_types (void);
 void aarch64_reset_previous_fndecl (void);
 bool aarch64_return_address_signing_enabled (void);
+bool aarch64_shadow_call_stack_enabled (void);
 bool aarch64_bti_enabled (void);
 void aarch64_save_restore_target_globals (tree);
 void aarch64_addti_scratch_regs (rtx, rtx, rtx *,
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 699c105a42a..5a36a459f4e 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -79,6 +79,7 @@
 #include "tree-ssa-loop-niter.h"
 #include "fractional-cost.h"
 #include "rtlanal.h"
+#include "asan.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -7799,6 +7800,24 @@ aarch64_return_address_signing_enabled (void)
  && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0)));
 }
 
+/

Re: [PATCH 2/2] add -Wdangling-pointer [PR #63272]

2021-11-02 Thread Eric Gallager via Gcc-patches
On Mon, Nov 1, 2021 at 6:20 PM Martin Sebor via Gcc-patches
 wrote:
>
> Patch 2 in this series adds support for detecting the uses of
> dangling pointers: those to auto objects that have gone out of
> scope.  Like patch 1, to minimize false positives this detection
> is very simplistic.  However, thanks to the more deterministic
> nature of the problem (all local objects go out of scope) is able
> to detect more instances of it.  The approach I used is to simply
> search the IL for clobbers that dominate uses of pointers to
> the clobbered objects.  If such a use is found that's not
> followed by a clobber of the same object the warning triggers.
> Similar to -Wuse-after-free, the new -Wdangling-pointer option
> has multiple levels: level 1 to detect unconditional uses and
> level 2 to flag conditional ones.  Unlike with -Wuse-after-free
> there is no use case for testing dangling pointers for
> equality, so there is no level 3.
>
> Tested on x86_64-linux and  by building Glibc and Binutils/GDB.
> It found no problems outside of the GCC test suite.
>
> As with the first patch in this series, the tests contain a number
> of xfails due to known limitations marked with pr??.  I'll
> open bugs for them before committing the patch if I don't resolve
> them first in a followup.
>
> Martin

So, I'd just like to take this chance to re-state my preference (as a
user) for having separate named options for warnings instead of having
a single option with multiple levels, so that users can toggle just
one but not the other. With the numerical levels, one can detect only
unconditional uses, and not conditional ones, by using level one, but
they can't do it the other way around (i.e. detecting conditional
uses, but not unconditional ones), though. I think having a split like
the one that exists between -Wuninitialized and -Wmaybe-uninitialized
would make sense here.

Eric


Re: [PATCH] libstdc++: Clear padding bits in atomic compare_exchange

2021-11-02 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 01, 2021 at 06:25:45PM -0700, Thomas Rodgers via Gcc-patches wrote:
> +template
> +  constexpr bool
> +  __maybe_has_padding()
> +  {
> +#if __has_builtin(__has_unique_object_representations)
> + return !__has_unique_object_representations(_Tp)
> +   && !is_floating_point<_Tp>::value;
> +#else
> + return true;
> +#endif

I'm not sure I understand the && !is_floating_point<_Tp>::value.
Yes, float and double will never have padding, but long double often
will, e.g. on x86 or ia64 (but e.g. not on ppc, s390x, etc.).
So, unless we want to play with numeric_limits, it should be either
just return !__has_unique_object_representations(_Tp);
or return !__has_unique_object_representations(_Tp)
  && (!is_floating_point<_Tp>::value
  || is_same<__remove_cv_t<_Tp>,long double>::value);
or, with numeric_limits test numeric_limits<_Tp>::digits == 64
(but I'm sure Jonathan will not want including such a header dependency
unless it is already included).
Or I can always provide a new __builtin_clear_padding_p ...

Jakub



[committed] openmp: Add testcase for threadprivate random access class iterators

2021-11-02 Thread Jakub Jelinek via Gcc-patches
Hi!

This adds a testcase for random access class iterators.  The diagnostics
can be different between templates and non-templates, as for some
threadprivate vars finish_id_expression replaces them with call to their
corresponding wrapper, but I think it is not that big deal, we reject
it in either case.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2021-11-02  Jakub Jelinek  

* g++.dg/gomp/loop-8.C: New test.

--- gcc/testsuite/g++.dg/gomp/loop-8.C.jj   2021-11-01 13:28:47.520972858 
+0100
+++ gcc/testsuite/g++.dg/gomp/loop-8.C  2021-11-01 13:23:29.676485591 +0100
@@ -0,0 +1,128 @@
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+template 
+class I
+{
+public:
+  typedef ptrdiff_t difference_type;
+  I ();
+  ~I ();
+  I (T *);
+  I (const I &);
+  T &operator * ();
+  T *operator -> ();
+  T &operator [] (const difference_type &) const;
+  I &operator = (const I &);
+  I &operator ++ ();
+  I operator ++ (int);
+  I &operator -- ();
+  I operator -- (int);
+  I &operator += (const difference_type &);
+  I &operator -= (const difference_type &);
+  I operator + (const difference_type &) const;
+  I operator - (const difference_type &) const;
+  template  friend bool operator == (I &, I &);
+  template  friend bool operator == (const I &, const I &);
+  template  friend bool operator < (I &, I &);
+  template  friend bool operator < (const I &, const I &);
+  template  friend bool operator <= (I &, I &);
+  template  friend bool operator <= (const I &, const I &);
+  template  friend bool operator > (I &, I &);
+  template  friend bool operator > (const I &, const I &);
+  template  friend bool operator >= (I &, I &);
+  template  friend bool operator >= (const I &, const I &);
+  template  friend typename I::difference_type operator - (I 
&, I &);
+  template  friend typename I::difference_type operator - 
(const I &, const I &);
+  template  friend I operator + (typename I::difference_type 
, const I &);
+private:
+  T *p;
+};
+
+template  bool operator == (I &, I &);
+template  bool operator == (const I &, const I &);
+template  bool operator != (I &, I &);
+template  bool operator != (const I &, const I &);
+template  bool operator < (I &, I &);
+template  bool operator < (const I &, const I &);
+template  bool operator <= (I &, I &);
+template  bool operator <= (const I &, const I &);
+template  bool operator > (I &, I &);
+template  bool operator > (const I &, const I &);
+template  bool operator >= (I &, I &);
+template  bool operator >= (const I &, const I &);
+template  typename I::difference_type operator - (I &, I 
&);
+template  typename I::difference_type operator - (const I &, 
const I &);
+template  I operator + (typename I::difference_type, const 
I &);
+
+extern I i, j;
+#pragma omp threadprivate (i, j)
+extern I k, l;
+#pragma omp threadprivate (k, l)
+I k, l;
+
+void
+f1 (I &x, I &y)
+{
+  #pragma omp for collapse(2)
+  for (i = x; i < y; i++)  // { dg-error "expected iteration declaration 
or initialization" }
+for (j = x; j < y; j++)
+  ;
+}
+
+void
+f2 (I &x, I &y)
+{
+  #pragma omp for collapse(2)
+  for (k = x; k < y; k++)  // { dg-error "expected iteration declaration 
or initialization" }
+for (l = x; l < y; l++)
+  ;
+}
+
+template 
+void
+f3 (I &x, I &y)
+{
+  #pragma omp for collapse(2)
+  for (i = x; i < y; i++)  // { dg-error "'i' is predetermined 
'threadprivate' for 'private'" }
+for (j = x; j < y; j++)// { dg-error "'j' is predetermined 
'threadprivate' for 'private'" }
+  ;
+}
+
+template 
+void
+f4 (I &x, I &y)
+{
+  #pragma omp for collapse(2)
+  for (k = x; k < y; k++)  // { dg-error "'k' is predetermined 
'threadprivate' for 'private'" }
+for (l = x; l < y; l++)// { dg-error "'l' is predetermined 
'threadprivate' for 'private'" }
+  ;
+}
+
+template 
+void
+f5 (I &x, I &y)
+{
+  #pragma omp for collapse(2)  // { dg-error "expected iteration declaration 
or initialization" }
+  for (i = x; i < y; i++)  // { dg-error "'i' is predetermined 
'threadprivate' for 'private'" }
+for (j = x; j < y; j++)// { dg-error "'j' is predetermined 
'threadprivate' for 'private'" }
+  ;
+}
+
+template 
+void
+f6 (I &x, I &y)
+{
+  #pragma omp for collapse(2)  // { dg-error "expected iteration declaration 
or initialization" }
+  for (k = x; k < y; k++)  // { dg-error "'k' is predetermined 
'threadprivate' for 'private'" }
+for (l = x; l < y; l++)// { dg-error "'l' is predetermined 
'threadprivate' for 'private'" }
+  ;
+}
+
+void
+test (I &x, I &y)
+{
+  f3<0> (x, y);
+  f4<0> (x, y);
+  f5 (x, y);
+  f6 (x, y);
+}

Jakub



[PATCH] ia32: Disallow mode(V1TI) [PR103020]

2021-11-02 Thread Jakub Jelinek via Gcc-patches
Hi!

As discussed in the PR, TImode isn't supported for -m32 on x86 (for the same
reason as on most 32-bit targets, no support for > 2 * BITS_PER_WORD
precision integers), but since PR32280 V1TImode is allowed with -msse in SSE
regs, V2TImode with -mavx or V4TImode with -mavx512f.
typedef __int128 V __attribute__((vector_size ({16,32,64}));
will not work, neither typedef int I __attribute__((mode(TI)));
but mode(V1TI), mode(V2TI) etc. are accepted with a warning when those
ISAs are enabled.  But they are certainly not fully supported, for some
optabs maybe, but most of them will not.  And, veclower lowering those ops
to TImode scalar operations will not work either because TImode isn't
supported.

So, this patch keeps V1TImode etc. in VALID*_MODE macros so that we can use
it in certain instructions, but disallows it in
targetm.vector_mode_supported_p, so that we don't offer those modes to the
user as supported.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-11-02  Jakub Jelinek  

PR target/103020
* config/i386/i386.c (ix86_vector_mode_supported_p): Reject vector
modes with TImode inner mode if 32-bit.

* gcc.target/i386/pr103020.c: New test.

--- gcc/config/i386/i386.c.jj   2021-10-28 11:29:01.827722053 +0200
+++ gcc/config/i386/i386.c  2021-11-01 11:01:44.123587169 +0100
@@ -21989,6 +21989,10 @@ ix86_libgcc_floating_mode_supported_p (s
 static bool
 ix86_vector_mode_supported_p (machine_mode mode)
 {
+  /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
+ either.  */
+  if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
+return false;
   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
 return true;
   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
--- gcc/testsuite/gcc.target/i386/pr103020.c.jj 2021-11-01 11:03:34.498017247 
+0100
+++ gcc/testsuite/gcc.target/i386/pr103020.c2021-11-01 11:11:10.794527161 
+0100
@@ -0,0 +1,11 @@
+/* PR target/103020 */
+/* { dg-do compile { target { ! int128 } } } */
+/* { dg-additional-options "-mavx512f" } */
+
+typedef int TI __attribute__((mode (TI))); /* { dg-error "unable to 
emulate" } */
+typedef int V1TI __attribute__((mode (V1TI))); /* { dg-error "unable to 
emulate" } */
+typedef int V2TI __attribute__((mode (V2TI))); /* { dg-error "unable to 
emulate" } */
+typedef int V4TI __attribute__((mode (V4TI))); /* { dg-error "unable to 
emulate" } */
+/* { dg-warning "is deprecated" "V1TI" { target *-*-* } .-3 } */
+/* { dg-warning "is deprecated" "V2TI" { target *-*-* } .-3 } */
+/* { dg-warning "is deprecated" "V4TI" { target *-*-* } .-3 } */

Jakub



Re: [PATCH] Add TSVC tests.

2021-11-02 Thread Martin Liška

On 10/29/21 11:36, Richard Biener wrote:

On Tue, Oct 26, 2021 at 5:27 PM Martin Liška  wrote:


On 10/26/21 10:13, Richard Biener wrote:

On Tue, Oct 19, 2021 at 8:49 AM Martin Liška  wrote:


On 10/18/21 12:08, Richard Biener wrote:

Can you please use a subdirectory for the sources, a "toplevel"
license.txt doesn't make much sense.  You can simply amend
vect.exp to process tsvc/*.c as well as sources so no need for an
extra .exp file.


Sure, it's a good idea and I've done that.



Is the license recognized as
compatible to the GPL as far as source distribution is concerned?


Yes: https://www.gnu.org/licenses/license-list.html#NCSA



Did you test the testcases on any non-x86 target?  (power/aarch64/arm)


Yes, I run the tests also on ppc64le-linux-gnu and aarch64-linux-gnu.

Thoughts?




Hey.


The overall setup looks fine to me.  There are quite some testcases
where there are no dg-final directives, some indicate in comments
that we do not expect vectorization - for those do we want to
add scan-tree-dump-not "loop vectorized" or so to make that clear?


In the updated version of the patch I added:
/* { dg-final { scan-tree-dump-not "vectorized \[1-9\] loops" "vect" } } */


For others do we want to add XFAILs so we'll notice when we improve
on TSVC?


What type of XFAILs do you mean?


Like

/* { dg-final { scann-tree-dump "vectorized 1 loops" "vect" { xfail
*-*-* } } } */


All right. The only limitation with that I see is that we can vectorize
more loops in the future and this pattern is still going to xfail.
We won't notice, right?



when the testcase looks for vectorization but we don't do that (yet).
For s1113 for example you added a scan-tree-dump-not but the comment
suggests we'd expect vectorization.


The comment comes from the original source of TSVC. I copied entire functions.




It looks like for example s124 is looking for IVOPTs rather
than vectorization?  There are testcases exercising float compares
(s124 is an example), vectorizing those likely requires a subset
of fast-math flags to allow if-conversion and masking, plus masking
is not available on all targets.  Is the intent to adjust testcase options
accordingly?


No, this is out of my scope, it has already taken me some time...


OK.



That said, I wonder whether it makes sense to initially only add
the parts having dg-final directives (that PASS or XFAIL), just
adding testcases for testing compile looks superfluous.

All of the testcases are dg-do compile, but vectorizer testcases
ideally would come with runtime verification.  I assume the
original TSVC provides this and as you include tscv.h in all
tests I suppose including a runtime harness would be possible, no?


All right, I'm adding also run-time checking. It took me some time making
array initialization for all tests independent. Plus I reduced number of
iterations to 1/10 of the origin. That makes tests quite fast.

What do you think about it now?


It looks nice now, but as said above some of the scan-tree-dump-not
should probably be xfailed scan-tree-dump, I was suggesting the
-not for the cases where vectorizing would be semantically wrong.


I see.



So I'd say OK with that change.


Installed that, thanks for review.

Martin



Thanks,
Richard.


Martin



Thanks,
Richard.


Thanks,
Martin



Richard.




Re: [PATCH] ia32: Disallow mode(V1TI) [PR103020]

2021-11-02 Thread Uros Bizjak via Gcc-patches
On Tue, Nov 2, 2021 at 9:41 AM Jakub Jelinek  wrote:
>
> Hi!
>
> As discussed in the PR, TImode isn't supported for -m32 on x86 (for the same
> reason as on most 32-bit targets, no support for > 2 * BITS_PER_WORD
> precision integers), but since PR32280 V1TImode is allowed with -msse in SSE
> regs, V2TImode with -mavx or V4TImode with -mavx512f.
> typedef __int128 V __attribute__((vector_size ({16,32,64}));
> will not work, neither typedef int I __attribute__((mode(TI)));
> but mode(V1TI), mode(V2TI) etc. are accepted with a warning when those
> ISAs are enabled.  But they are certainly not fully supported, for some
> optabs maybe, but most of them will not.  And, veclower lowering those ops
> to TImode scalar operations will not work either because TImode isn't
> supported.
>
> So, this patch keeps V1TImode etc. in VALID*_MODE macros so that we can use
> it in certain instructions, but disallows it in
> targetm.vector_mode_supported_p, so that we don't offer those modes to the
> user as supported.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2021-11-02  Jakub Jelinek  
>
> PR target/103020
> * config/i386/i386.c (ix86_vector_mode_supported_p): Reject vector
> modes with TImode inner mode if 32-bit.
>
> * gcc.target/i386/pr103020.c: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/i386.c.jj   2021-10-28 11:29:01.827722053 +0200
> +++ gcc/config/i386/i386.c  2021-11-01 11:01:44.123587169 +0100
> @@ -21989,6 +21989,10 @@ ix86_libgcc_floating_mode_supported_p (s
>  static bool
>  ix86_vector_mode_supported_p (machine_mode mode)
>  {
> +  /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
> + either.  */
> +  if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
> +return false;
>if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
>  return true;
>if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
> --- gcc/testsuite/gcc.target/i386/pr103020.c.jj 2021-11-01 11:03:34.498017247 
> +0100
> +++ gcc/testsuite/gcc.target/i386/pr103020.c2021-11-01 11:11:10.794527161 
> +0100
> @@ -0,0 +1,11 @@
> +/* PR target/103020 */
> +/* { dg-do compile { target { ! int128 } } } */
> +/* { dg-additional-options "-mavx512f" } */
> +
> +typedef int TI __attribute__((mode (TI))); /* { dg-error "unable to 
> emulate" } */
> +typedef int V1TI __attribute__((mode (V1TI))); /* { dg-error "unable to 
> emulate" } */
> +typedef int V2TI __attribute__((mode (V2TI))); /* { dg-error "unable to 
> emulate" } */
> +typedef int V4TI __attribute__((mode (V4TI))); /* { dg-error "unable to 
> emulate" } */
> +/* { dg-warning "is deprecated" "V1TI" { target *-*-* } .-3 } */
> +/* { dg-warning "is deprecated" "V2TI" { target *-*-* } .-3 } */
> +/* { dg-warning "is deprecated" "V4TI" { target *-*-* } .-3 } */
>
> Jakub
>


Re: [PATCH] libstdc++: Clear padding bits in atomic compare_exchange

2021-11-02 Thread Daniel Krügler via Gcc-patches
Am Di., 2. Nov. 2021 um 02:26 Uhr schrieb Thomas Rodgers via Libstdc++
:
>
> This should address Jonathan's feedback and adds support for atomic_ref
>

I'm wondering why __clear_padding doesn't refer to the computed __ptr
value in the case where __builtin_clear_padding is used?

Thanks,

- Daniel


Re: [PATCH 0/5] Fortran manual updates

2021-11-02 Thread Martin Liška

On 11/2/21 00:56, Sandra Loosemore wrote:

I'll wait a couple days before committing these patches, in case
anybody wants to give some feedback, especially on technical issues.


Hello.

Appreciate the work you did, but the patchset will cause quite some conflicts
in the prepared Sphinx migration patch I've sent to the mailing list :/
Anyway, I will rebase my patches. For the future, are you planning doing similar
documentation reorganization for a manual? Based on discussion with Gerald, I 
hope
we can finish the transition before the end of this year.

Thank you,
Martin


Re: [PATCH] libsanitizer: Disable libbacktrace on sanitizer_platform_limits_freebsd.cpp

2021-11-02 Thread Richard Biener via Gcc-patches
On Sat, Oct 30, 2021 at 10:54 PM H.J. Lu via Gcc-patches
 wrote:
>
> sanitizer_platform_limits_freebsd.cpp must include  from the OS,
> not include/md5.h in GCC source tree which is included by libbacktrace
> support.  Disable libbacktrace on sanitizer_platform_limits_freebsd.cpp
> to avoid include/md5.h to restore bootstrap on FreeBSD.

Err, but that will just get complaints about disabled libbacktrace backtracking
(from within the sanitizer?)?

> PR bootstrap/102675
> * sanitizer_common/Makefile.am (AM_CXXFLAGS): Extract libbacktrace
> CXXFLAGS to ...
> (LIBBACKTRACE_CXXFLAGS): Here.  New.
> (sanitizer_common_files): Move sanitizer_platform_limits_freebsd.cpp
> to ...
> (sanitizer_common_files_no_libbacktrace): Here.  New.
> (AM_CXXFLAGS): Add $(LIBBACKTRACE_CXXFLAGS) for
> $(sanitizer_common_files).
> (libsanitizer_common_la_SOURCES): Add
> $(sanitizer_common_files_no_libbacktrace).
> * sanitizer_common/Makefile.in: Regenerate.
> ---
>  libsanitizer/sanitizer_common/Makefile.am | 24 +++-
>  libsanitizer/sanitizer_common/Makefile.in | 35 ---
>  2 files changed, 41 insertions(+), 18 deletions(-)
>
> diff --git a/libsanitizer/sanitizer_common/Makefile.am 
> b/libsanitizer/sanitizer_common/Makefile.am
> index d04f2d8bd16..0ea459c2b3a 100644
> --- a/libsanitizer/sanitizer_common/Makefile.am
> +++ b/libsanitizer/sanitizer_common/Makefile.am
> @@ -9,11 +9,12 @@ AM_CXXFLAGS += $(LIBSTDCXX_RAW_CXX_CXXFLAGS)
>  AM_CXXFLAGS += -std=gnu++14
>  AM_CXXFLAGS += $(EXTRA_CXXFLAGS)
>  if LIBBACKTRACE_SUPPORTED
> -AM_CXXFLAGS += -DSANITIZER_LIBBACKTRACE -DSANITIZER_CP_DEMANGLE \
> -  -I $(top_srcdir)/../libbacktrace \
> -  -I $(top_builddir)/libbacktrace \
> -  -I $(top_srcdir)/../include \
> -  -include $(top_srcdir)/libbacktrace/backtrace-rename.h
> +LIBBACKTRACE_CXXFLAGS = \
> +  -DSANITIZER_LIBBACKTRACE -DSANITIZER_CP_DEMANGLE \
> +  -I $(top_srcdir)/../libbacktrace \
> +  -I $(top_builddir)/libbacktrace \
> +  -I $(top_srcdir)/../include \
> +  -include $(top_srcdir)/libbacktrace/backtrace-rename.h
>  endif
>  AM_CCASFLAGS = $(EXTRA_ASFLAGS)
>  ACLOCAL_AMFLAGS = -I m4
> @@ -45,7 +46,6 @@ sanitizer_common_files = \
> sanitizer_netbsd.cpp \
> sanitizer_openbsd.cpp \
> sanitizer_persistent_allocator.cpp \
> -   sanitizer_platform_limits_freebsd.cpp \
> sanitizer_platform_limits_linux.cpp \
> sanitizer_platform_limits_openbsd.cpp \
> sanitizer_platform_limits_posix.cpp \
> @@ -81,8 +81,18 @@ sanitizer_common_files = \
> sanitizer_unwind_win.cpp \
> sanitizer_win.cpp
>
> +# Don't add $(LIBBACKTRACE_CXXFLAGS) for the following files:
> +# 1. sanitizer_platform_limits_freebsd.cpp must include  from
> +#the OS, not include/md5.h in GCC source tree.
> +sanitizer_common_files_no_libbacktrace = \
> +   sanitizer_platform_limits_freebsd.cpp
>
> -libsanitizer_common_la_SOURCES = $(sanitizer_common_files)
> +$(sanitizer_common_files:.cpp=.lo) \
> +  $(sanitizer_common_files:.cpp=.$(OBJEXT)): AM_CXXFLAGS += 
> $(LIBBACKTRACE_CXXFLAGS)
> +
> +libsanitizer_common_la_SOURCES = \
> +  $(sanitizer_common_files) \
> +  $(sanitizer_common_files_no_libbacktrace)
>  libsanitizer_common_la_LIBADD = $(SANITIZER_COMMON_TARGET_DEPENDENT_OBJECTS)
>  libsanitizer_common_la_DEPENDENCIES =  
> $(SANITIZER_COMMON_TARGET_DEPENDENT_OBJECTS)
>
> diff --git a/libsanitizer/sanitizer_common/Makefile.in 
> b/libsanitizer/sanitizer_common/Makefile.in
> index 2856894d62b..1433db2238b 100644
> --- a/libsanitizer/sanitizer_common/Makefile.in
> +++ b/libsanitizer/sanitizer_common/Makefile.in
> @@ -89,12 +89,6 @@ POST_UNINSTALL = :
>  build_triplet = @build@
>  host_triplet = @host@
>  target_triplet = @target@
> -@LIBBACKTRACE_SUPPORTED_TRUE@am__append_1 = -DSANITIZER_LIBBACKTRACE 
> -DSANITIZER_CP_DEMANGLE \
> -@LIBBACKTRACE_SUPPORTED_TRUE@ -I $(top_srcdir)/../libbacktrace \
> -@LIBBACKTRACE_SUPPORTED_TRUE@ -I $(top_builddir)/libbacktrace \
> -@LIBBACKTRACE_SUPPORTED_TRUE@ -I $(top_srcdir)/../include \
> -@LIBBACKTRACE_SUPPORTED_TRUE@ -include 
> $(top_srcdir)/libbacktrace/backtrace-rename.h
> -
>  subdir = sanitizer_common
>  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
>  am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
> @@ -131,7 +125,6 @@ am__objects_1 = sancov_flags.lo sanitizer_allocator.lo \
> sanitizer_mac.lo sanitizer_mac_libcdep.lo sanitizer_mutex.lo \
> sanitizer_netbsd.lo sanitizer_openbsd.lo \
> sanitizer_persistent_allocator.lo \
> -   sanitizer_platform_limits_freebsd.lo \
> sanitizer_platform_limits_linux.lo \
> sanitizer_platform_limits_openbsd.lo \
> sanitizer_platform_limits_posix.lo \
> @@ -153,7 +146,8 @@ am__objects_1 = sancov_flags.lo sanitizer_allocator.lo \
> sanitizer_thread_registry.lo sanitizer_

[PUSHED] update my email address

2021-11-02 Thread Andrew Burgess via Gcc-patches
Update my email address, and move myself into the Write After Approval
list - I've not done any ARC work for years now.

/

* MAINTAINERS (Reviewers, arc): Remove my entry.
(Write After Approval): Add an entry for myself.
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index fe56b2f647e..31f379fc004 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -257,7 +257,6 @@ check in changes outside of the parts of the compiler they 
maintain.
 
Reviewers
 
-arc port   Andrew Burgess  
 arc port   Claudiu Zissulescu  
 callgraph  Martin Liska
 callgraph  Martin Jambor   
@@ -344,6 +343,7 @@ Robert Bowdidge 

 Joel Brobecker 
 Dave Brolley   
 Christian Bruel
+Andrew Burgess 
 Kevin Buettner 
 Adam Butcher   
 Andrew Cagney  
-- 
2.25.4



aix: Add FAT library support for libffi for AIX

2021-11-02 Thread CHIGOT, CLEMENT via Gcc-patches
Even if GCC64 is able to boostrap without libffi being a
FAT library on AIX, the tests for "-maix32" are not working
without it.

libffi/ChangeLog:
2021-10-21  Clément Chigot  

        * Makefile.am (tmake_file): Build and install AIX-style FAT
          libraries.
        * Makefile.in: Regenerate.
        * include/Makefile.in: Regenerate.
        * man/Makefile.in: Regenerate.
        * testsuite/Makefile.in: Regenerate.
        * configure (tmake_file): Substitute.
        * configure.ac: Regenerate.
        * configure.host (powerpc-*-aix*): Define tmake_file.
        * src/powerpc/t-aix: New file.

I've already made a PR to libffi itself in order to add the common part of 
this patch to it. But for now, it's still unmerged: 
https://github.com/libffi/libffi/pull/661. 

Clément

From 9722d209326b84d8817c1ae654190bdc6b546690 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Chigot?= 
Date: Thu, 21 Oct 2021 09:07:04 +0200
Subject: [PATCH] aix: Add FAT library support for libffi for AIX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Even if GCC64 is able to boostrap without libffi being a
FAT library on AIX, the tests for "-maix32" are not working
without it.

libffi/ChangeLog:
2021-10-21  Clément Chigot  

* Makefile.am (tmake_file): Build and install AIX-style FAT
  libraries.
* Makefile.in: Regenerate.
* include/Makefile.in: Regenerate.
* man/Makefile.in: Regenerate.
* testsuite/Makefile.in: Regenerate.
* configure (tmake_file): Substitute.
* configure.ac: Regenerate.
* configure.host (powerpc-*-aix*): Define tmake_file.
* src/powerpc/t-aix: New file.
---
 libffi/Makefile.am   |  3 +++
 libffi/Makefile.in   |  4 
 libffi/configure | 17 +++--
 libffi/configure.ac  | 12 
 libffi/configure.host|  2 ++
 libffi/include/Makefile.in   |  1 +
 libffi/man/Makefile.in   |  1 +
 libffi/src/powerpc/t-aix | 14 ++
 libffi/testsuite/Makefile.in |  1 +
 9 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 libffi/src/powerpc/t-aix

diff --git a/libffi/Makefile.am b/libffi/Makefile.am
index 02e36176c67..c671444c57c 100644
--- a/libffi/Makefile.am
+++ b/libffi/Makefile.am
@@ -228,4 +228,7 @@ clean-recursive: clean-multi
 distclean-recursive: distclean-multi
 maintainer-clean-recursive: maintainer-clean-multi
 
+# target overrides
+-include $(tmake_file)
+
 include $(top_srcdir)/../multilib.am
diff --git a/libffi/Makefile.in b/libffi/Makefile.in
index 6ff0c67a779..86eb1104f94 100644
--- a/libffi/Makefile.in
+++ b/libffi/Makefile.in
@@ -439,6 +439,7 @@ target_alias = @target_alias@
 target_cpu = @target_cpu@
 target_os = @target_os@
 target_vendor = @target_vendor@
+tmake_file = @tmake_file@
 toolexecdir = @toolexecdir@
 toolexeclibdir = @toolexeclibdir@
 top_build_prefix = @top_build_prefix@
@@ -1931,6 +1932,9 @@ clean-recursive: clean-multi
 distclean-recursive: distclean-multi
 maintainer-clean-recursive: maintainer-clean-multi
 
+# target overrides
+-include $(tmake_file)
+
 # GNU Make needs to see an explicit $(MAKE) variable in the command it
 # runs to enable its job server during parallel builds.  Hence the
 # comments below.
diff --git a/libffi/configure b/libffi/configure
index 4bababb87f5..9550a0906eb 100755
--- a/libffi/configure
+++ b/libffi/configure
@@ -644,6 +644,7 @@ LIBFFI_BUILD_VERSIONED_SHLIB_FALSE
 LIBFFI_BUILD_VERSIONED_SHLIB_TRUE
 OPT_LDFLAGS
 SECTION_LDFLAGS
+tmake_file
 toolexeclibdir
 toolexecdir
 FFI_DEBUG_FALSE
@@ -11481,7 +11482,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11484 "configure"
+#line 11485 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11587,7 +11588,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11590 "configure"
+#line 11591 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -16384,6 +16385,18 @@ esac
 
 
 
+# Conditionalize the makefile for this target machine.
+tmake_file_=
+for f in ${tmake_file}
+do
+  if test -f ${srcdir}/src/$TARGETDIR/$f
+  then
+tmake_file_="${tmake_file_} \$(srcdir)/src/$TARGETDIR/$f"
+  fi
+done
+tmake_file="${tmake_file_}"
+
+
 if test "${multilib}" = "yes"; then
   multilib_arg="--enable-multilib"
 else
diff --git a/libffi/configure.ac b/libffi/configure.ac
index bff5a1e7189..71777ea9b11 100644
--- a/libffi/configure.ac
+++ b/libffi/configure.ac
@@ -403,6 +403,18 @@ esac
 AC_SUBST(toolexecdir)
 AC_SUBST(toolexeclibdir)
 
+# Conditionalize the makefile for this target machine.
+tmake_file_=
+for f in ${tmake_file}
+do
+  if test -f ${srcdir}/src/$TARGETDIR/$f
+  then
+tmake_file_="${tmake_file_} \$(srcdir)/src/$TARGETDIR/$f"
+  fi
+done
+tmake_file="${tmake_file_}"
+AC_SUBST(tmake_file)
+
 if test "${multilib}"

Re: [PATCH 2/2]AArch64: Add better costing for vector constants and operations

2021-11-02 Thread Christophe Lyon via Gcc-patches
Hi Tamar,


On Fri, Oct 29, 2021 at 5:23 PM Richard Sandiford via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

> Tamar Christina  writes:
> > Hi All,
> >
> > Attached is a new version that fixes the previous SVE fallouts in a new
> way.
> >
> > Ok for master?
>


Looks like you forgot to try to build for arm* targets, you patch breaks
the build:
 gcc/config/arm/arm.c:1194:1: error: uninitialized const member
'vector_cost_table::movi'
[]

You probably need to initialize the new field for arm targets too.

Can you check?

Thanks,

Christophe



>
> > Thanks,
> > Tamar
> >
> > --- inline copy of patch ---
> >
> >
> > diff --git a/gcc/config/aarch64/aarch64-cost-tables.h
> b/gcc/config/aarch64/aarch64-cost-tables.h
> > index
> dd2e7e7cbb13d24f0b51092270cd7e2d75fabf29..bb499a1eae62a145f1665d521f57c98b49ac5389
> 100644
> > --- a/gcc/config/aarch64/aarch64-cost-tables.h
> > +++ b/gcc/config/aarch64/aarch64-cost-tables.h
> > @@ -124,7 +124,10 @@ const struct cpu_cost_table qdf24xx_extra_costs =
> >/* Vector */
> >{
> >  COSTS_N_INSNS (1),  /* alu.  */
> > -COSTS_N_INSNS (4)   /* mult.  */
> > +COSTS_N_INSNS (4),  /* mult.  */
> > +COSTS_N_INSNS (1),  /* movi.  */
> > +COSTS_N_INSNS (2),  /* dup.  */
> > +COSTS_N_INSNS (2)   /* extract.  */
> >}
> >  };
> >
> > @@ -229,7 +232,10 @@ const struct cpu_cost_table thunderx_extra_costs =
> >/* Vector */
> >{
> >  COSTS_N_INSNS (1),   /* Alu.  */
> > -COSTS_N_INSNS (4)/* mult.  */
> > +COSTS_N_INSNS (4),   /* mult.  */
> > +COSTS_N_INSNS (1),   /* movi.  */
> > +COSTS_N_INSNS (2),   /* dup.  */
> > +COSTS_N_INSNS (2)/* extract.  */
> >}
> >  };
> >
> > @@ -333,7 +339,10 @@ const struct cpu_cost_table
> thunderx2t99_extra_costs =
> >/* Vector */
> >{
> >  COSTS_N_INSNS (1),   /* Alu.  */
> > -COSTS_N_INSNS (4)/* Mult.  */
> > +COSTS_N_INSNS (4),   /* Mult.  */
> > +COSTS_N_INSNS (1),   /* movi.  */
> > +COSTS_N_INSNS (2),   /* dup.  */
> > +COSTS_N_INSNS (2)/* extract.  */
> >}
> >  };
> >
> > @@ -437,7 +446,10 @@ const struct cpu_cost_table
> thunderx3t110_extra_costs =
> >/* Vector */
> >{
> >  COSTS_N_INSNS (1),   /* Alu.  */
> > -COSTS_N_INSNS (4)/* Mult.  */
> > +COSTS_N_INSNS (4),   /* Mult.  */
> > +COSTS_N_INSNS (1),   /* movi.  */
> > +COSTS_N_INSNS (2),   /* dup.  */
> > +COSTS_N_INSNS (2)/* extract.  */
> >}
> >  };
> >
> > @@ -542,7 +554,10 @@ const struct cpu_cost_table tsv110_extra_costs =
> >/* Vector */
> >{
> >  COSTS_N_INSNS (1),  /* alu.  */
> > -COSTS_N_INSNS (4)   /* mult.  */
> > +COSTS_N_INSNS (4),  /* mult.  */
> > +COSTS_N_INSNS (1),  /* movi.  */
> > +COSTS_N_INSNS (2),  /* dup.  */
> > +COSTS_N_INSNS (2)   /* extract.  */
> >}
> >  };
> >
> > @@ -646,7 +661,10 @@ const struct cpu_cost_table a64fx_extra_costs =
> >/* Vector */
> >{
> >  COSTS_N_INSNS (1),  /* alu.  */
> > -COSTS_N_INSNS (4)   /* mult.  */
> > +COSTS_N_INSNS (4),  /* mult.  */
> > +COSTS_N_INSNS (1),  /* movi.  */
> > +COSTS_N_INSNS (2),  /* dup.  */
> > +COSTS_N_INSNS (2)   /* extract.  */
> >}
> >  };
> >
> > diff --git a/gcc/config/aarch64/aarch64-simd.md
> b/gcc/config/aarch64/aarch64-simd.md
> > index
> 29f381728a3b3d28bcd6a1002ba398c8b87713d2..61c3d7e195c510da88aa513f99af5f76f4d696e7
> 100644
> > --- a/gcc/config/aarch64/aarch64-simd.md
> > +++ b/gcc/config/aarch64/aarch64-simd.md
> > @@ -74,12 +74,14 @@ (define_insn "aarch64_simd_dup"
> >  )
> >
> >  (define_insn "aarch64_simd_dup"
> > -  [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
> > +  [(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
> >   (vec_duplicate:VDQF_F16
> > -   (match_operand: 1 "register_operand" "w")))]
> > +   (match_operand: 1 "register_operand" "w,r")))]
> >"TARGET_SIMD"
> > -  "dup\\t%0., %1.[0]"
> > -  [(set_attr "type" "neon_dup")]
> > +  "@
> > +   dup\\t%0., %1.[0]
> > +   dup\\t%0., %1"
> > +  [(set_attr "type" "neon_dup, neon_from_gp")]
> >  )
> >
> >  (define_insn "aarch64_dup_lane"
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index
> 699c105a42a613c06c462e2de686795279d85bc9..542fc874a4e224fb2cbe94e64eab590458fe935b
> 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -12705,7 +12705,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int
> outer ATTRIBUTE_UNUSED,
> >rtx op0, op1, op2;
> >const struct cpu_cost_table *extra_cost
> >  = aarch64_tune_params.insn_extra_cost;
> > -  int code = GET_CODE (x);
> > +  rtx_code code = GET_CODE (x);
> >scalar_int_mode int_mode;
> >
> >/* By default, assume that everything has equivalent cost to the
> > @@ -13466,8 +13466,7 @@ cost_plus:
> >
> >we must cost the explicit register move.  */
> >if (mode =

Re: [PATCH 02/21] Fix attribute bugs due to zicsr/zifencei

2021-11-02 Thread Kito Cheng
> diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
> index 225e5b259c1..1a786f31258 100644
> --- a/gcc/config/riscv/riscv.md
> +++ b/gcc/config/riscv/riscv.md
> @@ -1812,7 +1812,7 @@ (define_expand "clear_cache"
>
>  (define_insn "fence"
>[(unspec_volatile [(const_int 0)] UNSPECV_FENCE)]
> -  ""
> +  "TARGET_ZIFENCEI"
   "%|fence%-")

fence instruction is included in baseline ISA.
https://github.com/riscv/riscv-isa-manual/blob/master/src/rv32.tex#L1206


Re: [PATCH 01/21] Fix riscv_expand_block_move

2021-11-02 Thread Kito Cheng
IIRC this issue should be resolved?

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99314

On Sun, Oct 31, 2021 at 5:34 PM  wrote:
>
> From: linsinan1995 <47880367+linsinan1...@users.noreply.github.com>
>
> ---
>  gcc/config/riscv/riscv.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index 6aef3d3a6cf..0529b6d60cd 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -3491,7 +3491,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned 
> HOST_WIDE_INT length,
>  bool
>  riscv_expand_block_move (rtx dest, rtx src, rtx length)
>  {
> -  if (CONST_INT_P (length))
> +  if (CONST_INT_P (length) && INTVAL (length) >= 0)
>  {
>unsigned HOST_WIDE_INT hwi_length = UINTVAL (length);
>unsigned HOST_WIDE_INT factor, align;
> --
> 2.25.1
>


Re: [PATCH 00/21] RISC-V: add gcc support for Scalar Cryptography v1.0.0-rc5

2021-11-02 Thread Kito Cheng
Hi Si-Yu:

Thanks for your patch!

Here is a general comment for this patch-set, GCC require ChagneLog in
commit log, and seems like that is included in your commit log, you
can refer other commit log to see how changelog wrote.

Scalar cryptography extension seems not define builtin and intrinsic
within the spec, I would prefer add those builtin to
riscv-c-api-doc[1] before adding to GCC for make sure LLVM and GCC
toolchain has consistent interface, so I would like to merge arch
string related stuffs only first.

So could you send a V2 patchset without builtins and md changes?


[1] https://github.com/riscv-non-isa/riscv-c-api-doc/blob/master/riscv-c-api.md

Thanks!






On Sun, Oct 31, 2021 at 5:34 PM  wrote:
>
> From: SiYu Wu 
>
> This patch add gcc backend support for RISC-V Scalar Cryptography
> Extension (k-ext), including machine description, builtins defines and
> testcases for each k-ext's subset.
>
> A note about Zbkx: The Zbkx should be implemented in bitmanip's Zbp, but
> since zbp is not included in the bitmanip spec v1.0, and crypto's v1.0
> release will earlier than bitmanip's next release, so for now we
> implementing it here.
>
> SiYu Wu (19):
>   [crypto]: add machine description for Zknd and Zkne
>   [crypto]: add builtins for Zknd and Zkne
>   [crypto]: add testcases for Zknd and Zkne
>   [crypto]: add machine description for Zknh
>   [crypto]: add builtins for Zknh
>   [crypto]: add testcases for Zknh
>   [crypto]: add machine description for Zksed
>   [crypto]: add builtins for Zksed
>   [crypto]: add testcases for Zksed
>   [crypto]: add machine description for Zksh
>   [crypto]: add builtins for Zksh
>   [crypto]: add testcases for Zksh
>   [crypto]: add option defines for Zkr and Zkt
>   [crypto]: add option defines for Zbkb, Zbkc and Zbkx
>   [crypto]: add implied defines of Zk, Zkn and Zks
>   change z* subset assert to allow "zk"
>   [crypto]: add machine description for Zbkx
>   [crypto]: add builtins for Zbkx
>   [crypto]: add testcases for Zbkx
>
> jiawei (1):
>   Fix attribute bugs due to zicsr/zifencei
>
> linsinan1995 (1):
>   Fix riscv_expand_block_move
>
>  gcc/common/config/riscv/riscv-common.c|  39 ++-
>  gcc/config/riscv/arch-canonicalize|  18 +-
>  gcc/config/riscv/crypto.md| 319 ++
>  gcc/config/riscv/riscv-builtins-crypto.def|  76 +
>  gcc/config/riscv/riscv-builtins.c |  25 ++
>  gcc/config/riscv/riscv-ftypes.def |   6 +
>  gcc/config/riscv/riscv-opts.h |  21 ++
>  gcc/config/riscv/riscv.c  |   2 +-
>  gcc/config/riscv/riscv.md |   4 +-
>  gcc/config/riscv/riscv.opt|   3 +
>  gcc/testsuite/gcc.target/riscv/Zbkx.c |  17 +
>  gcc/testsuite/gcc.target/riscv/Zknd-aes-01.c  |  15 +
>  gcc/testsuite/gcc.target/riscv/Zknd-aes-02.c  |  21 ++
>  gcc/testsuite/gcc.target/riscv/Zkne-aes-01.c  |  15 +
>  gcc/testsuite/gcc.target/riscv/Zkne-aes-02.c  |  27 ++
>  gcc/testsuite/gcc.target/riscv/Zknh-sha256.c  |  27 ++
>  .../gcc.target/riscv/Zknh-sha512-01.c |  40 +++
>  .../gcc.target/riscv/Zknh-sha512-02.c |  28 ++
>  gcc/testsuite/gcc.target/riscv/Zksed-sm4.c|  17 +
>  gcc/testsuite/gcc.target/riscv/Zksh-sm3.c |  15 +
>  20 files changed, 730 insertions(+), 5 deletions(-)
>  create mode 100644 gcc/config/riscv/crypto.md
>  create mode 100644 gcc/config/riscv/riscv-builtins-crypto.def
>  create mode 100644 gcc/testsuite/gcc.target/riscv/Zbkx.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/Zknd-aes-01.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/Zknd-aes-02.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/Zkne-aes-01.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/Zkne-aes-02.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/Zknh-sha256.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/Zknh-sha512-01.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/Zknh-sha512-02.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/Zksed-sm4.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/Zksh-sm3.c
>
> --
> 2.25.1
>


Re: [PATCH 4/6 V2] aarch64: Add machine modes for Neon vector-tuple types

2021-11-02 Thread Jonathan Wright via Gcc-patches
Hi,

Each of the comments on the previous version of the patch have been
addressed.

Ok for master?

Thanks,
Jonathan


From: Richard Sandiford 
Sent: 22 October 2021 16:13
To: Jonathan Wright 
Cc: gcc-patches@gcc.gnu.org ; Kyrylo Tkachov 

Subject: Re: [PATCH 4/6] aarch64: Add machine modes for Neon vector-tuple types 
 
Thanks a lot for doing this.

Jonathan Wright  writes:
> @@ -763,9 +839,16 @@ aarch64_lookup_simd_builtin_type (machine_mode mode,
>  return aarch64_simd_builtin_std_type (mode, q);
>  
>    for (i = 0; i < nelts; i++)
> -    if (aarch64_simd_types[i].mode == mode
> - && aarch64_simd_types[i].q == q)
> -  return aarch64_simd_types[i].itype;
> +    {
> +  if (aarch64_simd_types[i].mode == mode
> +   && aarch64_simd_types[i].q == q)
> + return aarch64_simd_types[i].itype;
> +  else if (aarch64_simd_tuple_types[i][0] != NULL_TREE)

Very minor (sorry for not noticing earlier), but: the “else” is
redundant here.

> + for (int j = 0; j < 3; j++)
> +   if (TYPE_MODE (aarch64_simd_tuple_types[i][j]) == mode
> +   && aarch64_simd_types[i].q == q)
> + return aarch64_simd_tuple_types[i][j];
> +    }
>  
>    return NULL_TREE;
>  }
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 48eddf64e05afe3788abfa05141f6544a9323ea1..0aa185b67ff13d40c87db0449aec312929ff5387
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -6636,162 +6636,165 @@
>  
>  ;; Patterns for vector struct loads and stores.
>  
> -(define_insn "aarch64_simd_ld2"
> -  [(set (match_operand:OI 0 "register_operand" "=w")
> - (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
> - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
> -    UNSPEC_LD2))]
> +(define_insn "aarch64_simd_ld2"
> +  [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
> + (unspec:VSTRUCT_2Q [
> +   (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
> +   UNSPEC_LD2))]
>    "TARGET_SIMD"
>    "ld2\\t{%S0. - %T0.}, %1"
>    [(set_attr "type" "neon_load2_2reg")]
>  )
>  
> -(define_insn "aarch64_simd_ld2r"
> -  [(set (match_operand:OI 0 "register_operand" "=w")
> -   (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
> -   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
> -  UNSPEC_LD2_DUP))]
> +(define_insn "aarch64_simd_ld2r"
> +  [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
> + (unspec:VSTRUCT_2QD [
> +   (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
> +  UNSPEC_LD2_DUP))]

Sorry again for missing this, but the ld2rs, ld3rs and ld4rs should
keep their BLKmode arguments, since they only access 2, 3 or 4
scalar memory elements.

> @@ -7515,10 +7605,10 @@
>  )
>  
>  (define_insn_and_split "aarch64_combinev16qi"
> -  [(set (match_operand:OI 0 "register_operand" "=w")
> - (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
> - (match_operand:V16QI 2 "register_operand" "w")]
> -    UNSPEC_CONCAT))]
> +  [(set (match_operand:V2x16QI 0 "register_operand" "=w")
> + (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
> +  (match_operand:V16QI 2 "register_operand" "w")]
> + UNSPEC_CONCAT))]

Just realised that we can now make this a vec_concat, since the
modes are finally self-consistent.

No need to do that though, either way is fine.

Looks good otherwise.

Richard<>


Re: [PATCH v3] AArch64: Improve GOT addressing

2021-11-02 Thread Richard Sandiford via Gcc-patches
Wilco Dijkstra  writes:
> ping
>
>
> From: Wilco Dijkstra
> Sent: 04 June 2021 14:44
> To: Richard Sandiford 
> Cc: Kyrylo Tkachov ; GCC Patches 
> 
> Subject: [PATCH v3] AArch64: Improve GOT addressing
>
> Hi Richard,
>
> This merges the v1 and v2 patches and removes the spurious MEM from
> ldr_got_small_si/di. This has been rebased after [1], and the performance
> gain has now doubled.
>
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-June/571708.html
>
> Improve GOT addressing by treating the instructions as a pair.  This reduces
> register pressure and improves code quality significantly.  SPECINT2017 
> improves
> by 0.6% with -fPIC and codesize is 0.73% smaller.  Perlbench has 0.9% smaller
> codesize, 1.5% fewer executed instructions and is 1.8% faster on Neoverse N1.
>
> Passes bootstrap and regress. OK for commit?

Looks like everyone agrees that
https://github.com/ARM-software/abi-aa/pull/106 should go in in some form,
so I think it's OK for GCC to keep the instructions together.  Some comments
on the implementation though.  (We might have covered these earlier,
sorry if so.)

- Why do we rewrite the constant moves after reload into ldr_got_small_sidi
  and ldr_got_small_?  Couldn't we just get the move patterns to
  output the sequence directly?

- I think we should leave out the rtx_costs part and deal with that
  separately.  This patch should just be about whether we emit two
  separate define_insns for the move or whether we keep a single one
  (to support relaxation).

Thanks,
Richard

>
> ChangeLog:
> 2021-06-04  Wilco Dijkstra  
>
> * config/aarch64/aarch64.md (movsi): Split GOT accesses after reload.
> (movdi): Likewise.
> (ldr_got_small_): Remove MEM and LO_SUM, emit ADRP+LDR GOT 
> sequence.
> (ldr_got_small_sidi): Likewise.
> * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Delay
> splitting of GOT accesses until after reload. Remove tmp_reg and MEM.
> (aarch64_print_operand): Correctly print got_lo12 in L specifier.
> (aarch64_rtx_costs): Set rematerialization cost for GOT accesses.
> (aarch64_mov_operand_p): Make GOT accesses valid move operands.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 08245827daa3f8199b29031e754244c078f0f500..11ea33c70fb06194fadfe94322fdfa098e5320fc
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3615,6 +3615,14 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
>
>  case SYMBOL_SMALL_GOT_4G:
>{
> +   /* Use movdi for GOT accesses until after reload - this improves
> +  CSE and rematerialization.  */
> +   if (!reload_completed)
> + {
> +   emit_insn (gen_rtx_SET (dest, imm));
> +   return;
> + }
> +
>  /* In ILP32, the mode of dest can be either SImode or DImode,
> while the got entry is always of SImode size.  The mode of
> dest depends on how dest is used: if dest is assigned to a
> @@ -3624,34 +3632,21 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
> patterns here (two patterns for ILP32).  */
>
>  rtx insn;
> -   rtx mem;
> -   rtx tmp_reg = dest;
>  machine_mode mode = GET_MODE (dest);
>
> -   if (can_create_pseudo_p ())
> - tmp_reg = gen_reg_rtx (mode);
> -
> -   emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
>  if (mode == ptr_mode)
>{
>  if (mode == DImode)
> - insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
> + insn = gen_ldr_got_small_di (dest, imm);
>  else
> - insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
> -
> -   mem = XVECEXP (SET_SRC (insn), 0, 0);
> + insn = gen_ldr_got_small_si (dest, imm);
>}
>  else
>{
>  gcc_assert (mode == Pmode);
> -
> -   insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
> -   mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
> +   insn = gen_ldr_got_small_sidi (dest, imm);
>}
>
> -   gcc_assert (MEM_P (mem));
> -   MEM_READONLY_P (mem) = 1;
> -   MEM_NOTRAP_P (mem) = 1;
>  emit_insn (insn);
>  return;
>}
> @@ -11019,7 +11014,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
>switch (aarch64_classify_symbolic_expression (x))
>  {
>  case SYMBOL_SMALL_GOT_4G:
> - asm_fprintf (asm_out_file, ":lo12:");
> + asm_fprintf (asm_out_file, ":got_lo12:");
>break;
>
>  case SYMBOL_SMALL_TLSGD:
> @@ -13452,6 +13447,12 @@ cost_plus:
>
>  case SYMBOL_REF:
>*cost = 0;
> +
> +  /* Use a separate remateralization cost for GOT accesses.  */
> +  if (aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC
> + && aarch64_classify_symbol (x, 0) == SYMBOL_SMALL_GOT_4G)
> +   *cost = COSTS_N_INSNS (1)

Re: [PATCH] Initial implementation of -Whomoglyph [PR preprocessor/103027]

2021-11-02 Thread Jakub Jelinek via Gcc-patches
On Mon, Nov 01, 2021 at 05:14:12PM -0400, David Malcolm via Gcc-patches wrote:
> Thoughts?

Resending my previously internally posted mail:

Thanks for working for this, but I'm afraid this is done at a wrong
location.

Consider attached testcases Whomoglyph1.C and Whomoglyph2.C.
On Whomoglyph1.C testcase, I'd expect a warning, because there is a clear
confusion for the reader, something that isn't visible in any of emacs, vim,
joe editors or on the terminal, when f3 uses scope identifier, the casual
reader will expect that it uses N1::N2::scope, but there is no such
variable, only one N1::N2::ѕсоре that visually looks the same, but has
different UTF-8 chars in it.  So, name lookup will instead find N1::scope
and use that.
But Whomoglyph2.C will emit warnings that are IMHO not appropriate,
I believe there is no confusion at all there, e.g. for both C and C++,
the f5/f6 case, it doesn't really matter how each of the function names its
own parameter, one can never access another function's parameter.
Ditto for different namespace provided that both namespaces aren't searched
in the same name lookup, or similarly classes etc.
So, IMNSHO that warning belongs to name-lookup (cp/name-lookup.c for the C++
FE).
And, another important thing is that most users don't really use unicode in
identifiers, I bet over 99.9% of identifiers don't have any >= 0x80
characters in it and even when people do use them, confusable identifiers
during the same lookup are even far more unlikely.
So, I think we should optimize for the common case, ASCII only identifiers
and spend as little compile time as possible on this stuff.

Another thing we've discussed on IRC is that the Unicode confusables.txt has
some canonicalizations that I think most users will not appreciate, or at
least not appreciate if it is warned by default.  In particular, the
canonicalizations from ASCII to ASCII:
0 -> O
1 -> l
I -> l
m -> rn
I think most users use monospace fonts when working on C/C++ etc. source
code, anything else ruins indentation (ok, except for projects that indent
everything by tabs only).  In monospace fonts, I think m can't be confused
with rn.  And in most monospace fonts, 0 and O and 1/l/I are also fairly
well distinguishable.  So, I think we should warn about f00 vs. fOO or
home vs. horne or f1 vs. fl vs. fI only with -Whomoglyph=2, a non-default
extra level, rather than by default.
But, given the way confusables.txt is generated, while it is probably
easy to undo the m -> rn canonicalizations (assume everything that maps
to rn actually maps to m), for the 0 vs. O or 1 vs. l vs. I case I'm afraid
trying to differentiate to what it actually maps will be harder.
So, my proposal would be by default not to warn for the 0 vs. O, 1 vs. l vs. I
and m vs. rn differences if neither of the identifiers contains any UTF-8
chars and do warn otherwise.

For spending as short compile time as possible on this, I think libcpp
knows well if a UTF-8 or UCN character has been encountered in an identifier
(it uses the libcpp/charset.c entrypoints then), so perhaps we could mark
the tokens with a new flag that stands for "identifier contains at least one
UTF-8 (non-ASCII) or UCN character.  Or if we don't, perhaps just quickly
scanning identifier for characters with MSB set might be tollerable too.
So, that for how to identify identifiers for which we should compute the
canonical homoglyph form at all.
How to store it, again as we should IMHO optimize for the usual case where even
identifiers with UTF-8/UCN characters canonicalize to itself, I think best
representation for that would be not to waste whole pointer on each
IDENTIFIER_NODE for it, but instead reserve for it one bit on the
identifiers, "identifier has a canonical homoglyph version different from
itself", and that bit would mean we have some hash map etc. on the side
that maps it to the canonical identifier.
As for the actual uses of confusables.txt transformations, do you think
we need to work on UTF-32?  Can't it all be done on UTF-8 instead?  Let
whatever confusable.txt -> something.h generator we write prepare a decision
function that can use UTF-8 in both what to replace and certainly just a
UTF-8 string literal in what to replace it with.
Note, the above would mean we don't compute them for those 0 -> O, [1I] -> l
or m -> rn canonicalizations for the default -Whomoglyph mode.
Perhaps we can use some bit/flag on the C FE scopes or C++ namespaces
and on the identifiers we map to through the hash map.  On the scopes etc.
it would mean this scope has some identifiers in it that have the homoglyphs
alternatives and the homoglyph canonical forms have been already added to
wherever the name lookup can find them.  And on the canonical forms perhaps
stand for this canonical form has any O, l or rn sequences in it.
And then during actual name lookup, if current identifier doesn't the
has alt homoglyph canonicalization flag set and the scope doesn't have
the new bit set either, there would be nothing f

Re: [PATCH] libsanitizer: Disable libbacktrace on sanitizer_platform_limits_freebsd.cpp

2021-11-02 Thread H.J. Lu via Gcc-patches
On Tue, Nov 2, 2021 at 2:14 AM Richard Biener
 wrote:
>
> On Sat, Oct 30, 2021 at 10:54 PM H.J. Lu via Gcc-patches
>  wrote:
> >
> > sanitizer_platform_limits_freebsd.cpp must include  from the OS,
> > not include/md5.h in GCC source tree which is included by libbacktrace
> > support.  Disable libbacktrace on sanitizer_platform_limits_freebsd.cpp
> > to avoid include/md5.h to restore bootstrap on FreeBSD.
>
> Err, but that will just get complaints about disabled libbacktrace 
> backtracking
> (from within the sanitizer?)?

It disables libbacktrace backtracking only on one file,
sanitizer_platform_limits_freebsd.cpp,
which contains only data on FreeBSD.  I highly doubt that libbacktrace
backtracking is
used in it.

> > PR bootstrap/102675
> > * sanitizer_common/Makefile.am (AM_CXXFLAGS): Extract libbacktrace
> > CXXFLAGS to ...
> > (LIBBACKTRACE_CXXFLAGS): Here.  New.
> > (sanitizer_common_files): Move sanitizer_platform_limits_freebsd.cpp
> > to ...
> > (sanitizer_common_files_no_libbacktrace): Here.  New.
> > (AM_CXXFLAGS): Add $(LIBBACKTRACE_CXXFLAGS) for
> > $(sanitizer_common_files).
> > (libsanitizer_common_la_SOURCES): Add
> > $(sanitizer_common_files_no_libbacktrace).
> > * sanitizer_common/Makefile.in: Regenerate.
> > ---
> >  libsanitizer/sanitizer_common/Makefile.am | 24 +++-
> >  libsanitizer/sanitizer_common/Makefile.in | 35 ---
> >  2 files changed, 41 insertions(+), 18 deletions(-)
> >
> > diff --git a/libsanitizer/sanitizer_common/Makefile.am 
> > b/libsanitizer/sanitizer_common/Makefile.am
> > index d04f2d8bd16..0ea459c2b3a 100644
> > --- a/libsanitizer/sanitizer_common/Makefile.am
> > +++ b/libsanitizer/sanitizer_common/Makefile.am
> > @@ -9,11 +9,12 @@ AM_CXXFLAGS += $(LIBSTDCXX_RAW_CXX_CXXFLAGS)
> >  AM_CXXFLAGS += -std=gnu++14
> >  AM_CXXFLAGS += $(EXTRA_CXXFLAGS)
> >  if LIBBACKTRACE_SUPPORTED
> > -AM_CXXFLAGS += -DSANITIZER_LIBBACKTRACE -DSANITIZER_CP_DEMANGLE \
> > -  -I $(top_srcdir)/../libbacktrace \
> > -  -I $(top_builddir)/libbacktrace \
> > -  -I $(top_srcdir)/../include \
> > -  -include $(top_srcdir)/libbacktrace/backtrace-rename.h
> > +LIBBACKTRACE_CXXFLAGS = \
> > +  -DSANITIZER_LIBBACKTRACE -DSANITIZER_CP_DEMANGLE \
> > +  -I $(top_srcdir)/../libbacktrace \
> > +  -I $(top_builddir)/libbacktrace \
> > +  -I $(top_srcdir)/../include \
> > +  -include $(top_srcdir)/libbacktrace/backtrace-rename.h
> >  endif
> >  AM_CCASFLAGS = $(EXTRA_ASFLAGS)
> >  ACLOCAL_AMFLAGS = -I m4
> > @@ -45,7 +46,6 @@ sanitizer_common_files = \
> > sanitizer_netbsd.cpp \
> > sanitizer_openbsd.cpp \
> > sanitizer_persistent_allocator.cpp \
> > -   sanitizer_platform_limits_freebsd.cpp \
> > sanitizer_platform_limits_linux.cpp \
> > sanitizer_platform_limits_openbsd.cpp \
> > sanitizer_platform_limits_posix.cpp \
> > @@ -81,8 +81,18 @@ sanitizer_common_files = \
> > sanitizer_unwind_win.cpp \
> > sanitizer_win.cpp
> >
> > +# Don't add $(LIBBACKTRACE_CXXFLAGS) for the following files:
> > +# 1. sanitizer_platform_limits_freebsd.cpp must include  from
> > +#the OS, not include/md5.h in GCC source tree.
> > +sanitizer_common_files_no_libbacktrace = \
> > +   sanitizer_platform_limits_freebsd.cpp
> >
> > -libsanitizer_common_la_SOURCES = $(sanitizer_common_files)
> > +$(sanitizer_common_files:.cpp=.lo) \
> > +  $(sanitizer_common_files:.cpp=.$(OBJEXT)): AM_CXXFLAGS += 
> > $(LIBBACKTRACE_CXXFLAGS)
> > +
> > +libsanitizer_common_la_SOURCES = \
> > +  $(sanitizer_common_files) \
> > +  $(sanitizer_common_files_no_libbacktrace)
> >  libsanitizer_common_la_LIBADD = 
> > $(SANITIZER_COMMON_TARGET_DEPENDENT_OBJECTS)
> >  libsanitizer_common_la_DEPENDENCIES =  
> > $(SANITIZER_COMMON_TARGET_DEPENDENT_OBJECTS)
> >
> > diff --git a/libsanitizer/sanitizer_common/Makefile.in 
> > b/libsanitizer/sanitizer_common/Makefile.in
> > index 2856894d62b..1433db2238b 100644
> > --- a/libsanitizer/sanitizer_common/Makefile.in
> > +++ b/libsanitizer/sanitizer_common/Makefile.in
> > @@ -89,12 +89,6 @@ POST_UNINSTALL = :
> >  build_triplet = @build@
> >  host_triplet = @host@
> >  target_triplet = @target@
> > -@LIBBACKTRACE_SUPPORTED_TRUE@am__append_1 = -DSANITIZER_LIBBACKTRACE 
> > -DSANITIZER_CP_DEMANGLE \
> > -@LIBBACKTRACE_SUPPORTED_TRUE@ -I $(top_srcdir)/../libbacktrace \
> > -@LIBBACKTRACE_SUPPORTED_TRUE@ -I $(top_builddir)/libbacktrace \
> > -@LIBBACKTRACE_SUPPORTED_TRUE@ -I $(top_srcdir)/../include \
> > -@LIBBACKTRACE_SUPPORTED_TRUE@ -include 
> > $(top_srcdir)/libbacktrace/backtrace-rename.h
> > -
> >  subdir = sanitizer_common
> >  ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
> >  am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
> > @@ -131,7 +125,6 @@ am__objects_1 = sancov_flags.lo sanitizer_allocator.lo \
> > sanitizer_mac.lo san

Re: [PATCH] Initial implementation of -Whomoglyph [PR preprocessor/103027]

2021-11-02 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 02, 2021 at 12:56:53PM +0100, Jakub Jelinek wrote:
> Consider attached testcases Whomoglyph1.C and Whomoglyph2.C.
> On Whomoglyph1.C testcase, I'd expect a warning, because there is a clear
> confusion for the reader, something that isn't visible in any of emacs, vim,
> joe editors or on the terminal, when f3 uses scope identifier, the casual
> reader will expect that it uses N1::N2::scope, but there is no such
> variable, only one N1::N2::ѕсоре that visually looks the same, but has
> different UTF-8 chars in it.  So, name lookup will instead find N1::scope
> and use that.
> But Whomoglyph2.C will emit warnings that are IMHO not appropriate,
> I believe there is no confusion at all there, e.g. for both C and C++,
> the f5/f6 case, it doesn't really matter how each of the function names its
> own parameter, one can never access another function's parameter.
> Ditto for different namespace provided that both namespaces aren't searched
> in the same name lookup, or similarly classes etc.
> So, IMNSHO that warning belongs to name-lookup (cp/name-lookup.c for the C++
> FE).
> And, another important thing is that most users don't really use unicode in
> identifiers, I bet over 99.9% of identifiers don't have any >= 0x80
> characters in it and even when people do use them, confusable identifiers
> during the same lookup are even far more unlikely.
> So, I think we should optimize for the common case, ASCII only identifiers
> and spend as little compile time as possible on this stuff.

If we keep doing it in the stringpool, then e.g. one couldn't
#include 
in a program with Russian/Ukrainian/Serbian etc. identifiers where some 
parameter
or automatic variable etc. in some function in that file is called
с (Cyrillic letter es), etc. just because in zlib.h one of the arguments
in one of the function prototypes is called c (latin small letter c).
I'd be afraid most of the users that actually want to use UTF-8 or UCNs in
their identifiers would then just need to disable this warning...

Jakub



Re: [PATCH] libsanitizer: Disable libbacktrace on sanitizer_platform_limits_freebsd.cpp

2021-11-02 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 02, 2021 at 05:02:12AM -0700, H.J. Lu wrote:
> > On Sat, Oct 30, 2021 at 10:54 PM H.J. Lu via Gcc-patches
> >  wrote:
> > >
> > > sanitizer_platform_limits_freebsd.cpp must include  from the OS,
> > > not include/md5.h in GCC source tree which is included by libbacktrace
> > > support.  Disable libbacktrace on sanitizer_platform_limits_freebsd.cpp
> > > to avoid include/md5.h to restore bootstrap on FreeBSD.
> >
> > Err, but that will just get complaints about disabled libbacktrace 
> > backtracking
> > (from within the sanitizer?)?
> 
> It disables libbacktrace backtracking only on one file,
> sanitizer_platform_limits_freebsd.cpp,
> which contains only data on FreeBSD.  I highly doubt that libbacktrace
> backtracking is
> used in it.

include/md5.h is a header we have control over, can't we just add to it
something like:
#ifdef USE_SYSTEM_MD5
#include_next 
#else
Current header content
#endif
and arrange for that macro to be -DUSE_SYSTEM_MD5 when building that
libsanitizer source file?

Jakub



Re: [PATCH] libsanitizer: Disable libbacktrace on sanitizer_platform_limits_freebsd.cpp

2021-11-02 Thread H.J. Lu via Gcc-patches
On Tue, Nov 2, 2021 at 5:11 AM Jakub Jelinek  wrote:
>
> On Tue, Nov 02, 2021 at 05:02:12AM -0700, H.J. Lu wrote:
> > > On Sat, Oct 30, 2021 at 10:54 PM H.J. Lu via Gcc-patches
> > >  wrote:
> > > >
> > > > sanitizer_platform_limits_freebsd.cpp must include  from the OS,
> > > > not include/md5.h in GCC source tree which is included by libbacktrace
> > > > support.  Disable libbacktrace on sanitizer_platform_limits_freebsd.cpp
> > > > to avoid include/md5.h to restore bootstrap on FreeBSD.
> > >
> > > Err, but that will just get complaints about disabled libbacktrace 
> > > backtracking
> > > (from within the sanitizer?)?
> >
> > It disables libbacktrace backtracking only on one file,
> > sanitizer_platform_limits_freebsd.cpp,
> > which contains only data on FreeBSD.  I highly doubt that libbacktrace
> > backtracking is
> > used in it.
>
> include/md5.h is a header we have control over, can't we just add to it
> something like:
> #ifdef USE_SYSTEM_MD5
> #include_next 
> #else
> Current header content
> #endif
> and arrange for that macro to be -DUSE_SYSTEM_MD5 when building that
> libsanitizer source file?
>

Gerald, can you try this suggestion?  You can add "#define USE_SYSTEM_MD5"
in sanitizer_platform_limits_freebsd.cpp for testing.


-- 
H.J.


Re: [PATCH 4/6 V2] aarch64: Add machine modes for Neon vector-tuple types

2021-11-02 Thread Richard Sandiford via Gcc-patches
Jonathan Wright  writes:
> Each of the comments on the previous version of the patch have been
> addressed.

Thanks.

I realise I was wrong with the vcombine thing: it's only vec_concat
for LE, not for BE.  Sorry for the screw-up.

The patch is OK with that part reverted to your original version.

Richard


[PATCH] PR fortran/91497 -- Silence conversion warnings for MIN1 and MAX1

2021-11-02 Thread Manfred Schwarb via Gcc-patches
Hi,

In addition to the patches of Steve Kargl for PR 91497:
The MIN1 and MAX1 intrinsics do explicit type conversions and should
be silenced too for -Wconversion and -Wconversion-extra.

Adjust testcase to only use *4 and *8 real types, provide a second
testcase for *10 and *16 precisions.

Regtested on Linux x86_64.

Signed-off-by Manfred Schwarb 
2021-11-02  Manfred Schwarb  

gcc/testsuite/ChangeLog:

	PR fortran/91497
	* gfortran.dg/pr91497.f90: Adjust test to only use single and
	double precision. Add complex intrinsics.

--- a/gcc/testsuite/gfortran.dg/pr91497.f90
+++ b/gcc/testsuite/gfortran.dg/pr91497.f90
@@ -1,4 +1,4 @@
-! { dg-do compile { target { i?86-*-* x86_64-*-* } } }
+! { dg-do compile }
 ! { dg-options "-Wall" }
 ! Code contributed by Manfred Schwarb 
 ! PR fortran/91497
@@ -8,120 +8,120 @@
 !
 program foo

-  real*4 a,aa
-  real*8 b,bb
-  real*10 c,cc
-  real*16 d
-  integer*2 e,ee
-  integer*4 f,ff
-  integer*8 g,gg
+  real*4 a, aa
+  real*8 b, bb
+  integer*2 e, ee
+  integer*4 f, ff
+  integer*8 g, gg
+  complex(4) ww
+  complex(8) xx
   PARAMETER(a=3.1415927_4)
   PARAMETER(b=3.1415927_8)
-  PARAMETER(c=3.1415927_10)
-  PARAMETER(d=3.1415927_16)
   PARAMETER(e=123_2)
   PARAMETER(f=123_4)
   PARAMETER(g=123_8)

-  aa=REAL(b)
-  aa=REAL(c)
-  aa=REAL(d)
+  aa=REAL(b)! was: Change of value in conversion from 'REAL(8)' to 'REAL(4)'
   aa=REAL(e)
   aa=REAL(f)
   aa=REAL(g)
+  aa=REAL(b, kind=4)   ! was: Change of value in conversion from 'REAL(8)' to 'REAL(4)'
+  bb=REAL(a, kind=8)
+
   aa=FLOAT(f)
-  aa=FLOOR(b)
-  aa=FLOOR(c)
-  aa=FLOOR(d)
-  aa=CEILING(b)
-  aa=CEILING(c)
-  aa=CEILING(d)
-  !---unknown but documented type conversions:
-  !!aa=FLOATI(e)
-  !!aa=FLOATJ(f)
-  !!aa=FLOATK(g)
-  !---documentation is wrong for sngl:
-  aa=SNGL(c)
-  aa=SNGL(d)
-  bb=REAL(c, kind=8)
-  bb=REAL(d, kind=8)
-  bb=DBLE(c)
-  bb=DBLE(d)
   bb=DFLOAT(g)
-  bb=FLOOR(c)
-  bb=FLOOR(d)
-  bb=CEILING(c)
-  bb=CEILING(d)
-  cc=REAL(d, kind=10)
-  cc=FLOOR(d)
-  cc=CEILING(d)
-
-  aa=AINT(b)
-  aa=ANINT(b)
-  aa=AINT(c)
-  aa=ANINT(c)
-  aa=AINT(d)
-  aa=ANINT(d)
+  aa=SNGL(b)! was: Change of value in conversion from 'REAL(8)' to 'REAL(4)'
+  aa=AINT(a)
+  bb=AINT(b)
+  aa=AINT(b, kind=4)
   bb=DINT(b)
+  aa=ANINT(a)
+  bb=ANINT(b)
+  aa=ANINT(b, kind=4)
   bb=DNINT(b)
-
-  ee=INT(a, kind=2)
-  ee=NINT(a, kind=2)
-  ee=INT(b, kind=2)
-  ee=NINT(b, kind=2)
-  ee=INT(c, kind=2)
-  ee=NINT(c, kind=2)
-  ee=INT(d, kind=2)
-  ee=NINT(d, kind=2)
+  !---DEC type conversions (-fdec):
+  !!aa=FLOATI(e)
+  !!aa=FLOATJ(f)
+  !!aa=FLOATK(g)
+  aa=AMAX0(f, f)
+  aa=AMIN0(f, f)
+  aa=AMAX0(g, g)
+  aa=AMIN0(g, g)
+
+  ee=INT(a)
+  ee=INT(a, kind=2)! was: Change of value in conversion from 'REAL(4)' to 'INTEGER(2)'
+  ee=INT(b, kind=2)! was: Change of value in conversion from 'REAL(8)' to 'INTEGER(2)'
   ee=INT(f, kind=2)
   ee=INT(g, kind=2)
+  ff=INT(b)
+  ff=INT(a, kind=4)! was: Change of value in conversion from 'REAL(4)' to 'INTEGER(4)'
+  ff=INT(b, kind=4)! was: Change of value in conversion from 'REAL(8)' to 'INTEGER(4)'
+  ff=INT(f, kind=4)
+  ff=INT(g, kind=4)
+  gg=INT(a)
+  gg=INT(a, kind=8)! was: Change of value in conversion from 'REAL(4)' to 'INTEGER(8)'
+  gg=INT(b, kind=8)! was: Change of value in conversion from 'REAL(8)' to 'INTEGER(8)'
+  gg=INT(f, kind=8)
+  gg=INT(g, kind=8)
+
   ee=IFIX(a)
+  ff=IFIX(a)
+  gg=IFIX(a)
   ee=IDINT(b)
-  ee=IDNINT(b)
-  ee=INT2(a)
-  ee=INT2(b)
-  ee=INT2(c)
-  ee=INT2(d)
+  ff=IDINT(b)
+  gg=IDINT(b)
+  ee=INT2(a)! was: Change of value in conversion from 'REAL(4)' to 'INTEGER(2)'
+  ee=INT2(b)! was: Change of value in conversion from 'REAL(8)' to 'INTEGER(2)'
   ee=INT2(f)
   ee=INT2(g)
+  gg=INT8(a)! was: Change of value in conversion from 'REAL(4)' to 'INTEGER(8)'
+  gg=INT8(b)! was: Change of value in conversion from 'REAL(8)' to 'INTEGER(8)'
+  gg=INT8(f)
+  gg=INT8(g)
+
+  ff=FLOOR(b)
+  ee=FLOOR(b, kind=2)
+  ff=FLOOR(b, kind=4)
+  gg=FLOOR(b, kind=8)
+  ff=CEILING(b)
+  ee=CEILING(b, kind=2)
+  ff=CEILING(b, kind=4)
+  gg=CEILING(b, kind=8)
+  ff=MAX1(a, a)! was: Change of value in conversion from 'REAL(4)' to 'INTEGER(4)'
+  ff=MIN1(a, a)! was: Change of value in conversion from 'REAL(4)' to 'INTEGER(4)'
+  gg=MAX1(b, b)! was: Change of value in conversion from 'REAL(8)' to 'INTEGER(4)'
+  gg=MIN1(b, b)! was: Change of value in conversion from 'REAL(8)' to 'INTEGER(4)'

Re: [PATCH] [RFC][PR102768] aarch64: Add compiler support for Shadow Call Stack

2021-11-02 Thread Szabolcs Nagy via Gcc-patches
The 11/02/2021 00:06, Dan Li via Gcc-patches wrote:
> Shadow Call Stack can be used to protect the return address of a
> function at runtime, and clang already supports this feature[1].
> 
> To enable SCS in user mode, in addition to compiler, other support
> is also required (as described in [2]). This patch only adds basic
> support for SCS from the compiler side, and provides convenience
> for users to enable SCS.
> 
> For linux kernel, only the support of the compiler is required.
> 
> [1] https://clang.llvm.org/docs/ShadowCallStack.html
> [2] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102768

i'm not a gcc maintainer, but i prefer such feature
to be in upstream gcc instead of in a plugin.

it will require update to the documentation:

which should mention that it depends on -ffixed-x18
(probably that should be enforced too) which is an
important abi issue: functions following the normal
pcs can clobber x18 and break scs.

and that there is no unwinder support.

the abi issue means it is unlikely to be useful in
linux user space (even if libc and unwinder support
is implemented), but it can be still useful in
freestanding code such as the linux kernel.

thanks.

> 
> gcc/c-family/ChangeLog:
> 
>   * c-attribs.c (handle_no_sanitize_shadow_call_stack_attribute):
> 
> gcc/ChangeLog:
> 
>   * config/aarch64/aarch64-protos.h (aarch64_shadow_call_stack_enabled):
>   * config/aarch64/aarch64.c (aarch64_shadow_call_stack_enabled):
>   (aarch64_expand_prologue):
>   (aarch64_expand_epilogue):
>   * config/aarch64/aarch64.h (TARGET_SUPPORT_SHADOW_CALL_STACK):
>   * config/aarch64/aarch64.md (scs_push):
>   (scs_pop):
>   * defaults.h (TARGET_SUPPORT_SHADOW_CALL_STACK):
>   * flag-types.h (enum sanitize_code):
>   * opts.c (finish_options):
> 
> Signed-off-by: Dan Li 
> ---
>  gcc/c-family/c-attribs.c| 21 +
>  gcc/config/aarch64/aarch64-protos.h |  1 +
>  gcc/config/aarch64/aarch64.c| 27 +++
>  gcc/config/aarch64/aarch64.h|  4 
>  gcc/config/aarch64/aarch64.md   | 18 ++
>  gcc/defaults.h  |  4 
>  gcc/flag-types.h|  2 ++
>  gcc/opts.c  |  6 ++
>  8 files changed, 83 insertions(+)
> 
> diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
> index 007b928c54b..9b3a35c06bf 100644
> --- a/gcc/c-family/c-attribs.c
> +++ b/gcc/c-family/c-attribs.c
> @@ -56,6 +56,8 @@ static tree handle_cold_attribute (tree *, tree, tree, int, 
> bool *);
>  static tree handle_no_sanitize_attribute (tree *, tree, tree, int, bool *);
>  static tree handle_no_sanitize_address_attribute (tree *, tree, tree,
> int, bool *);
> +static tree handle_no_sanitize_shadow_call_stack_attribute (tree *, tree,
> +   tree, int, bool *);
>  static tree handle_no_sanitize_thread_attribute (tree *, tree, tree,
>int, bool *);
>  static tree handle_no_address_safety_analysis_attribute (tree *, tree, tree,
> @@ -454,6 +456,10 @@ const struct attribute_spec c_common_attribute_table[] =
> handle_no_sanitize_attribute, NULL },
>{ "no_sanitize_address",0, 0, true, false, false, false,
> handle_no_sanitize_address_attribute, NULL },
> +  { "no_sanitize_shadow_call_stack",
> +   0, 0, true, false, false, false,
> +   handle_no_sanitize_shadow_call_stack_attribute,
> +   NULL },
>{ "no_sanitize_thread", 0, 0, true, false, false, false,
> handle_no_sanitize_thread_attribute, NULL },
>{ "no_sanitize_undefined",  0, 0, true, false, false, false,
> @@ -1175,6 +1181,21 @@ handle_no_sanitize_address_attribute (tree *node, tree 
> name, tree, int,
>return NULL_TREE;
>  }
>  
> +/* Handle a "no_sanitize_shadow_call_stack" attribute; arguments as in
> +   struct attribute_spec.handler.  */
> +static tree
> +handle_no_sanitize_shadow_call_stack_attribute (tree *node, tree name,
> +   tree, int, bool *no_add_attrs)
> +{
> +  *no_add_attrs = true;
> +  if (TREE_CODE (*node) != FUNCTION_DECL)
> +warning (OPT_Wattributes, "%qE attribute ignored", name);
> +  else
> +add_no_sanitize_value (*node, SANITIZE_SHADOW_CALL_STACK);
> +
> +  return NULL_TREE;
> +}
> +
>  /* Handle a "no_sanitize_thread" attribute; arguments as in
> struct attribute_spec.handler.  */
>  
> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 768e8fae136..150c015df21 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -893,6 +893,7 @@ void aarch64_register_pragmas (void);
>  void aarch64_relayout_simd_types (void);
>  void aarch64_reset_

Re: [PATCH] Add -fopt-builtin optimization option

2021-11-02 Thread Richard Biener via Gcc-patches
On Sun, Oct 31, 2021 at 11:13 AM Keith Packard via Gcc-patches
 wrote:
>
> This option (enabled by default) controls optimizations which convert
> a sequence of operations into an equivalent sequence that includes
> calls to builtin functions. Typical cases here are code which matches
> memcpy, calloc, sincos.
>
> The -ftree-loop-distribute-patterns flag only covers converting loops
> into builtin calls, not numerous other places where knowledge of
> builtin function semantics changes the generated code.
>
> The goal is to allow built-in functions to be declared by the compiler
> and used directly by the application, but to disable optimizations
> which create new calls to them, and to allow this optimization
> behavior to be changed for individual functions by decorating the
> function definition like this:
>
> void
> attribute((optimize("no-opt-builtin")))
> sincos(double x, double *s, double *c)
> {
> *s = sin(x);
> *c = cos(x);
> }
>
> This also avoids converting loops into library calls like this:
>
> void *
> attribute((optimize("no-opt-builtin")))
> memcpy(void *__restrict__ dst, const void *__restrict__ src, size_t n)
> {
> char *d = dst;
> const char *s = src;
>
> while (n--)
> *d++ = *s++;
> return dst;
> }
>
> As well as disabling analysis of memory lifetimes around free as in
> this example:
>
> void *
> attribute((optimize("no-opt-builtin")))
> erase_and_free(void *ptr)
> {
> memset(ptr, '\0', malloc_usable_size(ptr));
> free(ptr);
> }
>
> Clang has a more sophisticated version of this mechanism which
> can disable all builtins, or disable a specific builtin:
>
> double
> attribute((no_builtin("exp2")))
> exp2(double x)
> {
> return pow (2.0, x);
> }

I don't think it reliably works the way you implement it.  It's also having
more side-effects than what you document, in particular

  pow (2.0, x);

will now clobber and use global memory (besides errno).

I think you may want to instead change builtin_decl_implicit
to avoid code-generating a specific builtin.

Generally we'd also want sth like the clang attribute and _not_
use optimize("") for this or a global flag_*, so the behavior can
be more readily encoded in the IL.  In fact a flag on the call
statement could be added to denote the desired effect on it.

I also don't see the advantage compared to -fno-builtin[-foo].
Declaring the function should be something that's already done.

Richard.

> Signed-off-by: Keith Packard 
> ---
>  gcc/builtins.c   | 6 ++
>  gcc/common.opt   | 4 
>  gcc/gimple.c | 3 +++
>  gcc/tree-loop-distribution.c | 2 ++
>  4 files changed, 15 insertions(+)
>
> diff --git a/gcc/builtins.c b/gcc/builtins.c
> index 7d0f61fc98b..7aae57deab5 100644
> --- a/gcc/builtins.c
> +++ b/gcc/builtins.c
> @@ -1922,6 +1922,9 @@ mathfn_built_in_2 (tree type, combined_fn fn)
>built_in_function fcodef64x = END_BUILTINS;
>built_in_function fcodef128x = END_BUILTINS;
>
> +  if (flag_no_opt_builtin)
> +return END_BUILTINS;
> +
>switch (fn)
>  {
>  #define SEQ_OF_CASE_MATHFN \
> @@ -2125,6 +2128,9 @@ mathfn_built_in_type (combined_fn fn)
>case CFN_BUILT_IN_##MATHFN##L_R: \
>  return long_double_type_node;
>
> +  if (flag_no_opt_builtin)
> +return NULL_TREE;
> +
>switch (fn)
>  {
>  SEQ_OF_CASE_MATHFN
> diff --git a/gcc/common.opt b/gcc/common.opt
> index eeba1a727f2..d6111cc776a 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2142,6 +2142,10 @@ fomit-frame-pointer
>  Common Var(flag_omit_frame_pointer) Optimization
>  When possible do not generate stack frames.
>
> +fopt-builtin
> +Common Var(flag_no_opt_builtin, 0) Optimization
> +Match code sequences equivalent to builtin functions
> +
>  fopt-info
>  Common Var(flag_opt_info) Optimization
>  Enable all optimization info dumps on stderr.
> diff --git a/gcc/gimple.c b/gcc/gimple.c
> index 22dd6417d19..5b82b9409c0 100644
> --- a/gcc/gimple.c
> +++ b/gcc/gimple.c
> @@ -2790,6 +2790,9 @@ gimple_builtin_call_types_compatible_p (const gimple 
> *stmt, tree fndecl)
>  {
>gcc_checking_assert (DECL_BUILT_IN_CLASS (fndecl) != NOT_BUILT_IN);
>
> +  if (flag_no_opt_builtin)
> +return false;
> +
>tree ret = gimple_call_lhs (stmt);
>if (ret
>&& !useless_type_conversion_p (TREE_TYPE (ret),
> diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
> index 583c01a42d8..43f22a3c7ce 100644
> --- a/gcc/tree-loop-distribution.c
> +++ b/gcc/tree-loop-distribution.c
> @@ -1859,6 +1859,7 @@ loop_distribution::classify_partition (loop_p loop,
>
>/* Perform general partition disqualification for builtins.  */
>if

Building GNU Arm Embedded Toolchain for macOS/arm64

2021-11-02 Thread Romain Goyet via Gcc-patches
Hello,

Arm distribute pre-built versions of GCC that targets bare-metal Cortex-M
devices at
https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm

They offer a source release as well as pre-built binaries for Linux, macOS
and Windows.

The macOS version is only built for the x86_64 architecture, even though
more and more macOS devices run on aarch64.

I have written a few small patches to get this toolchain to build and run
on macOS/arm64. Should I submit them somewhere?

Thanks!

 - Romain


Re: [PATCH, rs6000] Disable gimple fold for float or double vec_minmax when fast-math is not set

2021-11-02 Thread David Edelsohn via Gcc-patches
On Mon, Nov 1, 2021 at 10:40 PM HAO CHEN GUI  wrote:
>
> David,
>
> My patch file was broken. I am sorry for it.  Here is the correct one. 
> Thanks a lot.
>
> ChangeLog
>
> 2021-11-01 Haochen Gui 
>
> gcc/
> * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin): Disable
> gimple fold for VSX_BUILTIN_XVMINDP, ALTIVEC_BUILTIN_VMINFP,
> VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP when fast-math is not
> set.
>
> gcc/testsuite/
> * gcc.target/powerpc/vec-minmax-1.c: New test.
> * gcc.target/powerpc/vec-minmax-2.c: Likewise.

This is okay.

The default DejaGNU test action is compile, but it's a good idea to
include the dg-do line to be clear and document the intention.

Thanks, David


[PATCH] middle-end/103038 - avoid ICE with -ftrivial-auto-var-init=pattern

2021-11-02 Thread Richard Biener via Gcc-patches
This avoids ICEing with expanding a VIEW_CONVERT_EXRP of a SSA name
on the LHS by making sure we can native-interpret OFFSET_TYPE and
by never building such a LHS but instead view-converting the RHS
for SSA LHS.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-11-02  Richard Biener  

PR middle-end/103038
* fold-const.c (native_interpret_expr): Handle OFFSET_TYPE.
(can_native_interpret_type_p): Likewise.
* internal-fn.c (expand_DEFERRED_INIT): View-convert the
RHS if the LHS is an SSA name.

* g++.dg/pr103038.C: New testcase.
---
 gcc/fold-const.c| 2 ++
 gcc/internal-fn.c   | 8 ++--
 gcc/testsuite/g++.dg/pr103038.C | 5 +
 3 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/pr103038.C

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 54f91f0149c..2d3ba07e541 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -8791,6 +8791,7 @@ native_interpret_expr (tree type, const unsigned char 
*ptr, int len)
 case BOOLEAN_TYPE:
 case POINTER_TYPE:
 case REFERENCE_TYPE:
+case OFFSET_TYPE:
   return native_interpret_int (type, ptr, len);
 
 case REAL_TYPE:
@@ -8827,6 +8828,7 @@ can_native_interpret_type_p (tree type)
 case REAL_TYPE:
 case COMPLEX_TYPE:
 case VECTOR_TYPE:
+case OFFSET_TYPE:
   return true;
 default:
   return false;
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 9e10da0ad5c..fd6cb0995d9 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -3090,8 +3090,12 @@ expand_DEFERRED_INIT (internal_fn, gcall *stmt)
 (total_bytes * BITS_PER_UNIT, 1);
  wide_int w = wi::from_buffer (buf, total_bytes);
  init = wide_int_to_tree (itype, w);
- /* Pun the LHS to make sure its type has constant size.  */
- lhs = build1 (VIEW_CONVERT_EXPR, itype, lhs);
+ /* Pun the LHS to make sure its type has constant size
+unless it is an SSA name where that's already known.  */
+ if (TREE_CODE (lhs) != SSA_NAME)
+   lhs = build1 (VIEW_CONVERT_EXPR, itype, lhs);
+ else
+   init = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), init);
}
}
   else
diff --git a/gcc/testsuite/g++.dg/pr103038.C b/gcc/testsuite/g++.dg/pr103038.C
new file mode 100644
index 000..bb7183a460e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr103038.C
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-ftrivial-auto-var-init=pattern" } */
+
+struct S;
+void test() { int(S::*PtrMem); }
-- 
2.31.1


Re: [PATCH] Check number of iterations for test cases pr101145

2021-11-02 Thread Richard Biener via Gcc-patches
On Mon, 1 Nov 2021, Jiufu Guo wrote:

> PR101145 is supporting if the number of iterations can be calculated
> for the 'until wrap' condition.  Current test cases are checking if
> the loop can be vectorized, if a loop can be vectorized then the number
> of interations is known.  While it would be better to check the loop's
> number of iterations directly.  This patch updates the test cases
> accordingly.
> 
> Bootstrap and regtest pass on ppc,ppc64le and x86_64.
> Is this ok for trunk?

Not sure - the motivation was to make the loop vectorizable so
a vectorized check is strictly more powerful.  What's the problem
with the existing test?

Richard.

> BR,
> Jiufu
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/vect/pr101145_1.c: Update case.
>   * gcc.dg/vect/pr101145_2.c: Update case.
>   * gcc.dg/vect/pr101145_3.c: Update case.
> 
> ---
>  gcc/testsuite/gcc.dg/vect/pr101145_1.c | 2 +-
>  gcc/testsuite/gcc.dg/vect/pr101145_2.c | 2 +-
>  gcc/testsuite/gcc.dg/vect/pr101145_3.c | 2 +-
>  3 files changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c 
> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> index 9332b2c4257..13a89fa6863 100644
> --- a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> @@ -10,4 +10,4 @@
>  
>  #include "pr101145.inc"
>  
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "Symbolic number of iterations is" 2 
> "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c 
> b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> index fa2c6be689a..5265491b98d 100644
> --- a/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
> @@ -10,4 +10,4 @@
>  
>  #include "pr101145.inc"
>  
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "Symbolic number of iterations is" 2 
> "vect" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c 
> b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> index 9f43c82593f..ffda26cf0bc 100644
> --- a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
> @@ -10,4 +10,4 @@
>  
>  #include "pr101145.inc"
>  
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
> +/* { dg-final { scan-tree-dump-times "Symbolic number of iterations is" 2 
> "vect" } } */
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)


Re: Add static_chain support to ipa-modref

2021-11-02 Thread Richard Biener via Gcc-patches
On Mon, 1 Nov 2021, Jan Hubicka wrote:

> Hi,
> this is patchs teaches ipa-modref about the static chain that is, like
> retslot, a hiden argument.  The patch is pretty much symemtric to what
> was done for retslot handling and I verified it does the intended job
> for Ada LTO bootstrap.
> 
> Bootstrapped/regtested x86_64-linux, OK?

OK.

Thanks,
Richard.

> Honza
> 
> gcc/ChangeLog:
> 
>   * gimple.c (gimple_call_static_chain_flags): New function.
>   * gimple.h (gimple_call_static_chain_flags): Declare
>   * ipa-modref.c (modref_summary::modref_summary): Initialize
>   static_chain_flags.
>   (modref_summary_lto::modref_summary_lto): Likewise.
>   (modref_summary::useful_p): Test static_chain_flags.
>   (modref_summary_lto::useful_p): Likewise.
>   (struct modref_summary_lto): Add static_chain_flags.
>   (modref_summary::dump): Dump static_chain_flags.
>   (modref_summary_lto::dump): Likewise.
>   (struct escape_point): Add static_cahin_arg.
>   (analyze_ssa_name_flags): Use gimple_call_static_chain_flags.
>   (analyze_parms): Handle static chains.
>   (modref_summaries::duplicate): Duplicate static_chain_flags.
>   (modref_summaries_lto::duplicate): Likewise.
>   (modref_write): Stream static_chain_flags.
>   (read_section): Likewise.
>   (modref_merge_call_site_flags): Handle static_chain_flags.
>   * ipa-modref.h (struct modref_summary): Add static_chain_flags.
>   * tree-ssa-structalias.c (handle_rhs_call): Use
>   * gimple_static_chain_flags.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/ipa/modref-3.c: New test.
> 
> diff --git a/gcc/gimple.c b/gcc/gimple.c
> index 22dd6417d19..ef07d9385c5 100644
> --- a/gcc/gimple.c
> +++ b/gcc/gimple.c
> @@ -1647,6 +1647,33 @@ gimple_call_retslot_flags (const gcall *stmt)
>return flags;
>  }
>  
> +/* Detects argument flags for static chain on call STMT.  */
> +
> +int
> +gimple_call_static_chain_flags (const gcall *stmt)
> +{
> +  int flags = 0;
> +
> +  tree callee = gimple_call_fndecl (stmt);
> +  if (callee)
> +{
> +  cgraph_node *node = cgraph_node::get (callee);
> +  modref_summary *summary = node ? get_modref_function_summary (node)
> + : NULL;
> +
> +  if (summary)
> + {
> +   int modref_flags = summary->static_chain_flags;
> +
> +   /* We have possibly optimized out load.  Be conservative here.  */
> +   gcc_checking_assert (node->binds_to_current_def_p ());
> +   if (dbg_cnt (ipa_mod_ref_pta))
> + flags |= modref_flags;
> + }
> +}
> +  return flags;
> +}
> +
>  /* Detects return flags for the call STMT.  */
>  
>  int
> diff --git a/gcc/gimple.h b/gcc/gimple.h
> index 23a124ec769..3cde3cde7fe 100644
> --- a/gcc/gimple.h
> +++ b/gcc/gimple.h
> @@ -1590,6 +1590,7 @@ bool gimple_call_same_target_p (const gimple *, const 
> gimple *);
>  int gimple_call_flags (const gimple *);
>  int gimple_call_arg_flags (const gcall *, unsigned);
>  int gimple_call_retslot_flags (const gcall *);
> +int gimple_call_static_chain_flags (const gcall *);
>  int gimple_call_return_flags (const gcall *);
>  bool gimple_call_nonnull_result_p (gcall *);
>  tree gimple_call_nonnull_arg (gcall *);
> diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
> index d866d9ed6b3..ae8ed53b396 100644
> --- a/gcc/ipa-modref.c
> +++ b/gcc/ipa-modref.c
> @@ -270,7 +270,8 @@ static GTY(()) fast_function_summary  *, va_gc>
>  /* Summary for a single function which this pass produces.  */
>  
>  modref_summary::modref_summary ()
> -  : loads (NULL), stores (NULL), retslot_flags (0), writes_errno (false)
> +  : loads (NULL), stores (NULL), retslot_flags (0), static_chain_flags (0),
> +writes_errno (false)
>  {
>  }
>  
> @@ -325,6 +326,9 @@ modref_summary::useful_p (int ecf_flags, bool check_flags)
>arg_flags.release ();
>if (check_flags && remove_useless_eaf_flags (retslot_flags, ecf_flags, 
> false))
>  return true;
> +  if (check_flags
> +  && remove_useless_eaf_flags (static_chain_flags, ecf_flags, false))
> +return true;
>if (ecf_flags & ECF_CONST)
>  return false;
>if (loads && !loads->every_base)
> @@ -367,6 +371,7 @@ struct GTY(()) modref_summary_lto
>modref_records_lto *stores;
>auto_vec GTY((skip)) arg_flags;
>eaf_flags_t retslot_flags;
> +  eaf_flags_t static_chain_flags;
>bool writes_errno;
>  
>modref_summary_lto ();
> @@ -378,7 +383,8 @@ struct GTY(()) modref_summary_lto
>  /* Summary for a single function which this pass produces.  */
>  
>  modref_summary_lto::modref_summary_lto ()
> -  : loads (NULL), stores (NULL), retslot_flags (0), writes_errno (false)
> +  : loads (NULL), stores (NULL), retslot_flags (0), static_chain_flags (0),
> +writes_errno (false)
>  {
>  }
>  
> @@ -406,6 +412,9 @@ modref_summary_lto::useful_p (int ecf_flags, bool 
> check_flags)
>arg_flags.release ();
>if (check_flags && remove_useless_eaf_flags (retslot_flags, ecf_flags, 

Re: [PATCH]middle-end Fix PR103007, add missing check on complex fms detection.

2021-11-02 Thread Richard Biener via Gcc-patches
On Mon, 1 Nov 2021, Tamar Christina wrote:

> Hi All,
> 
> The complex FMS detection is missing a check on if the nodes of the VEC_PERM
> has the amount of children we expect before it recurses.
> 
> This check is there on MUL and FMA but was missing for FMS, due to this the
> compiler goes on further than it should and hits an assert.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues.
> 
> Ok for master?

OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   PR tree-optimizations/103007
>   * tree-vect-slp-patterns.c (complex_fms_pattern::matches): Add elem
>   check.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimizations/103007
>   * g++.dg/pr103007.C: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/g++.dg/pr103007.C b/gcc/testsuite/g++.dg/pr103007.C
> new file mode 100644
> index 
> ..1631a85080039f29b83c97d2f62c66be9eac109f
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr103007.C
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3" } */
> +
> +typedef float MushMeshVector[4];
> +struct MushMeshQuaternionPair {
> +  void VectorRotate(MushMeshVector &);
> +  MushMeshVector m_first;
> +  MushMeshVector m_second;
> +};
> +void 
> +MushMeshQuaternionPair::
> +VectorRotate(MushMeshVector &ioVec)  {
> +  ioVec[2] = (2 - m_first[1] + m_first[3] * 0);
> +  ioVec[3] = (m_first[3] + m_first[1] - m_first[2] * 0);
> +  float c = ioVec[2], d = ioVec[3];
> +  ioVec[2] = (0 - d * m_second[1]);
> +  ioVec[3] = (2 - c * m_second[1]);
> +}
> +
> diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c
> index 
> 6b37e9bac6f3f86a51d1a532a4c570a37af76eac..5e64a9bc417ab6b855e8791fd482dba23287f467
>  100644
> --- a/gcc/tree-vect-slp-patterns.c
> +++ b/gcc/tree-vect-slp-patterns.c
> @@ -1250,13 +1250,17 @@ complex_fms_pattern::matches (complex_operation_t op,
>  
>auto childs = SLP_TREE_CHILDREN (nodes[0]);
>auto l0node = SLP_TREE_CHILDREN (childs[0]);
> -  auto l1node = SLP_TREE_CHILDREN (childs[1]);
>  
>/* Now operand2+4 may lead to another expression.  */
>auto_vec left_op, right_op;
>left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
>right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
>  
> +  /* If these nodes don't have any children then they're
> + not ones we're interested in.  */
> +  if (left_op.length () != 2 || right_op.length () != 2)
> +return IFN_LAST;
> +
>bool is_neg = vect_normalize_conj_loc (left_op);
>  
>bool conj_first_operand = false;
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)


Re: [PATCH] PR fortran/91497 -- Silence conversion warnings for MIN1 and MAX1

2021-11-02 Thread Thomas Koenig via Gcc-patches

Hi Manfred,


In addition to the patches of Steve Kargl for PR 91497:
The MIN1 and MAX1 intrinsics do explicit type conversions and should
be silenced too for -Wconversion and -Wconversion-extra.

Adjust testcase to only use *4 and *8 real types, provide a second
testcase for *10 and *16 precisions.

Two points:

We should modify existing test cases only when necessary, because
modification can impede a regression test.  It is better to create
a new one.

While we do recognize real*4 and real*8 and so on, these are
non-standard extensions, and I would like to avoid to have these
with new test cases.

Instead of real*8, you can use real(8) or double precision.

OK with these changes to the test cases.

Thanks for the patch!

Best regards

Thomas


Re: [PATCH] Add debug counters to back threader.

2021-11-02 Thread Richard Biener via Gcc-patches
On Mon, Nov 1, 2021 at 2:03 PM Jeff Law via Gcc-patches
 wrote:
>
>
>
> On 11/1/2021 3:54 AM, Aldy Hernandez wrote:
> > Chasing down stage3 miscomparisons is never fun, and having no way to
> > distinguish between jump threads registered by a particular
> > pass, is even harder.  This patch adds debug counters for the individual
> > back threading passes.  I've left the ethread pass alone, as that one is
> > usually benign, but we could easily add it if needed.
> >
> > The fact that we can only pass one boolean argument to the passes
> > infrastructure has us do all sorts of gymnastics to differentiate
> > between the various back threading passes.
> >
> > Tested on x86-64 Linux.
> >
> > OK?
> >
> > gcc/ChangeLog:
> >
> >   * dbgcnt.def: Add debug counter for back_thread[12] and
> >   back_threadfull[12].
> >   * passes.def: Pass "first" argument to each back threading pass.
> >   * tree-ssa-threadbackward.c (back_threader::back_threader): Add
> >   first argument.
> >   (back_threader::debug_counter): New.
> >   (back_threader::maybe_register_path): Call debug_counter.
> OK

But it's ugly.  Very.  Why isn't a single debug-counter good enough?
You should be able to reduce to a single threading pass via
-fdisable-tree-xyz and then bisect with the debug counter.

Alternatively at least store the debug counter to query somewhere
so you can have the "hot" path query a single one.  So instead of

if (!dbg_cnt (back_thread1))

do

if (!dbg_cnt (curr_cnt))

and compute curr_cnt somewhere.

Richard.

> jeff
>


Re: [PATCH] gcc: implement AIX-style constructors

2021-11-02 Thread CHIGOT, CLEMENT via Gcc-patches
Hi David,

Here is the new version of the patch.
I've moved the startup function in crtcdtors files.

I'm just wondering if the part dealing with the
__init_aix_libgcc_cxa_atexit is needed. I'm adding it because
the destructor created in crtcxa.o is following GCC format and
thus won't be launched if the flag "-mcdtors=aix" is passed.
However, as you said, this option might not operate correctly
if the GCC runtime isn't rebuild with it.

Thanks,
Clément
From 8a14b0eb312628ad9cce8ac9f439c420b12b33c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Chigot?= 
Date: Mon, 4 Oct 2021 09:24:43 +0200
Subject: [PATCH] gcc: implement AIX-style constructors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

AIX linker now supports constructors and destructors detection. For such
functions to be detected, their name must starts with __sinit or __sterm.
and -bcdtors must be passed to linker calls. It will create "_cdtors"
symbol which can be used to launch the initialization.

This patch creates a new RS6000 flag "-mcdtors=".
With "-mcdtors=aix", gcc will generate these new constructors/destructors.
With "-mcdtors=gcc", which is currently the default, gcc will continue
to generate "gcc" format for constructors (ie _GLOBAL__I and _GLOBAL__D
symbols).
Ideally, it would have been better to enable the AIX format by default
instead of using collect2. However, the compatibility between the
previously-built binaries and the new ones is too complex to be done.

gcc/ChangeLog:
2021-10-04  Clément Chigot  

	* collect2.c (aixbcdtors_flags): New variable.
	(main): Use it to detect -bcdtors and remove -binitfini flag.
	(write_c_file_stat): Adapt to new AIX format.
	* config/rs6000/aix.h (FILE_SINIT_FORMAT): New define.
	(FILE_STERM_FORMAT): New define.
	(TARGET_FILE_FUNCTION_FORMAT): New define.
	* config/rs6000/aix64.opt: Add -mcdtors flag.
	* config/rs6000/aix71.h (LINK_SPEC_COMMON): Pass -bcdtors when
	  -mcdtors=aix is passed.
	(STARTFILE_SPEC): Add crtcdtors.o with -mcdtors=aix.
	* config/rs6000/aix72.h (LINK_SPEC_COMMON): Likewise.
	(STARTFILE_SPEC): Likewise.
	* config/rs6000/aix73.h (LINK_SPEC_COMMON): Likewise.
	(STARTFILE_SPEC): Likewise.
	* config/rs6000/rs6000-opts.h (enum rs6000_cdtors): New enum.
	* doc/invoke.texi: Add -mcdtors flag.
	* tree.c (get_file_function_name): Add
	  TARGET_FILE_FUNCTION_FORMAT support.

libgcc/ChangeLog:

	* config.host: Add crtcdtors.o files.
	* config/rs6000/t-aix-cxa: Likewise.
	* config/rs6000/crtcdtors.c: New file.

gcc/testsuite/ChangeLog:
2021-10-04  Clément Chigot  

	* gcc.target/powerpc/constructor-aix.c: New test.
---
 gcc/collect2.c| 63 ---
 gcc/config/rs6000/aix.h   | 56 +
 gcc/config/rs6000/aix64.opt   | 17 +
 gcc/config/rs6000/aix71.h | 10 ++-
 gcc/config/rs6000/aix72.h | 10 ++-
 gcc/config/rs6000/aix73.h | 10 ++-
 gcc/config/rs6000/rs6000-opts.h   |  8 +++
 gcc/doc/invoke.texi   | 21 ++-
 .../gcc.target/powerpc/constructor-aix.c  | 12 
 gcc/tree.c|  5 ++
 libgcc/config.host|  2 +-
 libgcc/config/rs6000/crtcdtors.c  | 53 
 libgcc/config/rs6000/t-aix-cxa| 12 
 13 files changed, 260 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/constructor-aix.c
 create mode 100644 libgcc/config/rs6000/crtcdtors.c

diff --git a/gcc/collect2.c b/gcc/collect2.c
index 33114322f01..3d04bc8465f 100644
--- a/gcc/collect2.c
+++ b/gcc/collect2.c
@@ -186,6 +186,7 @@ static int aix64_flag;			/* true if -b64 */
 static int aixrtl_flag;			/* true if -brtl */
 static int aixlazy_flag;		/* true if -blazy */
 static int visibility_flag;		/* true if -fvisibility */
+static int aixbcdtors_flag;/* True if -bcdtors */
 #endif
 
 enum lto_mode_d {
@@ -984,6 +985,8 @@ main (int argc, char **argv)
 	  aixrtl_flag = 0;
 	else if (strcmp (argv[i], "-blazy") == 0)
 	  aixlazy_flag = 1;
+	else if (strcmp (argv[i], "-bcdtors") == 0)
+	  aixbcdtors_flag = 1;
 #endif
   }
 
@@ -1731,7 +1734,9 @@ main (int argc, char **argv)
   /* Tell the linker that we have initializer and finalizer functions.  */
 #ifdef LD_INIT_SWITCH
 #ifdef COLLECT_EXPORT_LIST
-  *ld2++ = concat (LD_INIT_SWITCH, ":", initname, ":", fininame, NULL);
+  /* Do not emit -binitfini when -bcdtors is enabled. */
+  if (!aixbcdtors_flag)
+*ld2++ = concat (LD_INIT_SWITCH, ":", initname, ":", fininame, NULL);
 #else
   *ld2++ = LD_INIT_SWITCH;
   *ld2++ = initname;
@@ -2020,6 +2025,7 @@ write_c_file_stat (FILE *stream, const char *name ATTRIBUTE_UNUSED)
 {
   const char *p, *q;
   char *prefix, *r;
+  char *regframe_name, *deregframe_name;
   int frames = (frame_tables.number > 0);
 
   /* Figure out name of output_file, stripping off

Re: [PATCH]middle-end testsuite: fix failing complex add testcases PR103000

2021-11-02 Thread Richard Biener via Gcc-patches
On Mon, 1 Nov 2021, Tamar Christina wrote:

> Hi All,
> 
> Some targets have overriden the default unroll factor and so do not have 
> enough
> data to succeed for SLP vectorization if loop vect is turned off.
> 
> To fix this just always unroll in these testcases.

Another option is to place

#pragma GCC unroll N

before the loop you need to have unrolled N times.

> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/testsuite/ChangeLog:
> 
>   PR testsuite/103000
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-double.c:
>   Force unroll.
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-float.c: likewise
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-float.c:
>   Likewise
>   * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-half-float.c:
>   Likewise.
> 
> --- inline copy of patch -- 
> diff --git 
> a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-double.c 
> b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-double.c
> index 
> 4445119fc9d2c7dafe6abb5f7fb741c7794144a2..23f179a55dcf77c7cfa8f55f748c9973b5e9c646
>  100644
> --- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-double.c
> +++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-double.c
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target vect_double } */
>  /* { dg-add-options arm_v8_3a_complex_neon } */
> -/* { dg-additional-options "-fno-tree-loop-vectorize" } */
> +/* { dg-additional-options "-fno-tree-loop-vectorize -funroll-loops" } */
>  /* { dg-add-options arm_v8_1m_mve_fp } */
>  
>  #define TYPE double
> diff --git 
> a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-float.c 
> b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-float.c
> index 
> ff53719d1a895a7161ebcc6fba4903fc3de9095f..cc7715160981274605b4ab21e7db33fdb373e04d
>  100644
> --- a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-float.c
> +++ b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-float.c
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target vect_float } */
>  /* { dg-add-options arm_v8_3a_complex_neon } */
> -/* { dg-additional-options "-fno-tree-loop-vectorize" } */
> +/* { dg-additional-options "-fno-tree-loop-vectorize -funroll-loops" } */
>  /* { dg-add-options arm_v8_1m_mve_fp } */
>  
>  #define TYPE float
> diff --git 
> a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-float.c
>  
> b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-float.c
> index 
> 8bc7117565e79a0e93a22d2b28a32e9c5ddfe4d3..fb6a1676fb4b7a766088dcec42a3a2465c3e11f9
>  100644
> --- 
> a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-float.c
> +++ 
> b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-float.c
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target vect_float } */
>  /* { dg-add-options arm_v8_3a_complex_neon } */
> -/* { dg-additional-options "-fno-tree-loop-vectorize" } */
> +/* { dg-additional-options "-fno-tree-loop-vectorize -funroll-loops" } */
>  /* { dg-add-options arm_v8_1m_mve_fp } */
>  
>  #define TYPE float
> diff --git 
> a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-half-float.c
>  
> b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-half-float.c
> index 
> 80e0f5d5412318d05883813a81dc4a2d9a62f234..4bb106a3d520c6ab2a322cc463f6a7f5c5238f95
>  100644
> --- 
> a/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-half-float.c
> +++ 
> b/gcc/testsuite/gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-half-float.c
> @@ -1,6 +1,7 @@
>  /* { dg-do compile } */
> +/* { dg-require-effective-target vect_complex_add_half } */
>  /* { dg-add-options arm_v8_3a_fp16_complex_neon } */
> -/* { dg-additional-options "-fno-tree-loop-vectorize" } */
> +/* { dg-additional-options "-fno-tree-loop-vectorize -funroll-loops" } */
>  /* { dg-add-options arm_v8_1m_mve_fp } */
>  
>  #define TYPE _Float16
> @@ -8,6 +9,6 @@
>  #include "complex-add-pattern-template.c"
>  
>  /* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" { 
> target { vect_complex_add_half } } } } */
> -/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { 
> target { vect_complex_add_byte } && ! target { arm*-*-* } } } } */
> +/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { 
> target { vect_complex_add_half } && ! target { arm*-*-* } } } } */
>  /* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "slp1" { xfail 
> *-*-* } } } */
>  /* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" } } */
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev

Re: [PATCH] Add debug counters to back threader.

2021-11-02 Thread Aldy Hernandez via Gcc-patches
On Tue, Nov 2, 2021 at 2:27 PM Richard Biener
 wrote:
>
> On Mon, Nov 1, 2021 at 2:03 PM Jeff Law via Gcc-patches
>  wrote:
> >
> >
> >
> > On 11/1/2021 3:54 AM, Aldy Hernandez wrote:
> > > Chasing down stage3 miscomparisons is never fun, and having no way to
> > > distinguish between jump threads registered by a particular
> > > pass, is even harder.  This patch adds debug counters for the individual
> > > back threading passes.  I've left the ethread pass alone, as that one is
> > > usually benign, but we could easily add it if needed.
> > >
> > > The fact that we can only pass one boolean argument to the passes
> > > infrastructure has us do all sorts of gymnastics to differentiate
> > > between the various back threading passes.
> > >
> > > Tested on x86-64 Linux.
> > >
> > > OK?
> > >
> > > gcc/ChangeLog:
> > >
> > >   * dbgcnt.def: Add debug counter for back_thread[12] and
> > >   back_threadfull[12].
> > >   * passes.def: Pass "first" argument to each back threading pass.
> > >   * tree-ssa-threadbackward.c (back_threader::back_threader): Add
> > >   first argument.
> > >   (back_threader::debug_counter): New.
> > >   (back_threader::maybe_register_path): Call debug_counter.
> > OK
>
> But it's ugly.  Very.  Why isn't a single debug-counter good enough?
> You should be able to reduce to a single threading pass via
> -fdisable-tree-xyz and then bisect with the debug counter.

Indeed.  I'm not a big fan either.

The -fdisable-tree-xyz approach is my first line of defense, but
sometimes the problem is a combination of threading passes working in
tandem.  For example, thread1 threads a path that causes a later
thread99 pass to find another path.  So I can't just have one counter.
We need to be able to bisect the thread1 path, and then, if there's
still a problem, bisect the thread99 pass.

I was fighting a bootstrap miscomparison bug when I could reduce the
problem to 2 threading passes, and then further to thread1's 123 path,
and thread2's 567 and 890 paths.  Not fun.

Aldy

>
> Alternatively at least store the debug counter to query somewhere
> so you can have the "hot" path query a single one.  So instead of
>
> if (!dbg_cnt (back_thread1))
>
> do
>
> if (!dbg_cnt (curr_cnt))
>
> and compute curr_cnt somewhere.
>
> Richard.
>
> > jeff
> >
>



Re: redundant bitmap_bit_p followed by bitmap_clear_bit [was: Re: [COMMITTED] Kill second order relations in the path solver.]

2021-11-02 Thread Richard Biener via Gcc-patches
On Mon, Nov 1, 2021 at 10:02 PM Bernhard Reutner-Fischer via
Gcc-patches  wrote:
>
> On Mon, 1 Nov 2021 15:21:03 +0100
> Aldy Hernandez  wrote:
>
> > I'm not convinced this makes the code clearer to read, especially if
> > it's not on a critical path.  But if you feel strongly, please submit
> > a patch ;-).
>
> No i don't feel strongly about it.
> Compiling e.g. -O2 ira.o
> # Overhead   Samples  Command  Shared Object  Symbol
> #     ...  .  .
> #
>100.00%  4197  cc1plus  cc1plus[.] mark_reachable_blocks
>100.00% 22000  cc1plus  cc1plus[.] path_oracle::killing_def
> and the mark_elimination is reload.
> So it's not just a handful of calls saved but some. And it would be
> smaller code as it saves a call. Well maybe another day.

Note that single bit set/clear are already implemented as test && set/clear.
Note that unfortunately the sbitmap bitmap_set/clear_bit overloads do not
return the previous state of the bit.  Maybe providing
bitmap_test_and_set_bit () and bitmap_test_and_clear_bit () would be
better (but note we currently return true when the bit changed, not when
it was set).

Richard.

> thanks,
> >
> > Aldy
> >
> > On Mon, Nov 1, 2021 at 3:10 PM Bernhard Reutner-Fischer
> >  wrote:
> > >
> > > On Thu, 28 Oct 2021 01:55:30 +0200
> > > Bernhard Reutner-Fischer  wrote:
> > >
> > > > On Wed, 27 Oct 2021 20:13:21 +0200
> > > > Aldy Hernandez via Gcc-patches  wrote:
> > >
> > > > > @@ -1306,6 +1307,24 @@ path_oracle::killing_def (tree ssa)
> > > > >ptr->m_next = m_equiv.m_next;
> > > > >m_equiv.m_next = ptr;
> > > > >bitmap_ior_into (m_equiv.m_names, b);
> > > > > +
> > > > > +  // Walk the relation list an remove SSA from any relations.
> > > >
> > > > s/an /and /
> > > >
> > > > > +  if (!bitmap_bit_p (m_relations.m_names, v))
> > > > > +return;
> > > > > +
> > > > > +  bitmap_clear_bit (m_relations.m_names, v);
> > > >
> > > > IIRC bitmap_clear_bit returns true if the bit was set, false otherwise,
> > > > so should be used as if(!bitmap_clear_bit) above.
> > >
> > > > > +  relation_chain **prev = &(m_relations.m_head);
> > > >
> > > > s/[()]//
> > > > thanks,
> > >
> > > There seems to be two other spots where a redundant bitmap_bit_p checks
> > > if we want to bitmap_clear_bit. In dse and ira.
> > > Like:
> > > $ cat ~/coccinelle/gcc_bitmap_bit_p-0.cocci ; echo EOF
> > > // replace redundant bitmap_bit_p() bitmap_clear_bit with the latter
> > > @ rule1 @
> > > identifier fn;
> > > expression bitmap, bit;
> > > @@
> > >
> > > fn(...) {
> > > <...
> > > (
> > > -if (bitmap_bit_p (bitmap, bit))
> > > +if (bitmap_clear_bit (bitmap, bit))
> > > {
> > >   ...
> > > -  bitmap_clear_bit (bitmap, bit);
> > >   ...
> > > }
> > > |
> > > -if (bitmap_bit_p (bitmap, bit))
> > > +if (bitmap_clear_bit (bitmap, bit))
> > > {
> > >   ...
> > > }
> > > ...
> > > -bitmap_clear_bit (bitmap, bit);
> > > )
> > > ...>
> > > }
> > > EOF
> > > $ find gcc/ -type f -a \( -name "*.c" -o -name "*.cc" \) -a \( ! -path 
> > > "gcc/testsuite/*" -a ! -path "gcc/contrib/*" \) -exec spatch -sp_file 
> > > ~/coccinelle/gcc_bitmap_bit_p-0.cocci --show-diff {} \;
> > > diff =
> > > --- gcc/dse.c
> > > +++ /tmp/cocci-output-1104419-443759-dse.c
> > > @@ -3238,9 +3238,8 @@ mark_reachable_blocks (sbitmap unreachab
> > >edge e;
> > >edge_iterator ei;
> > >
> > > -  if (bitmap_bit_p (unreachable_blocks, bb->index))
> > > +  if (bitmap_clear_bit(unreachable_blocks, bb->index))
> > >  {
> > > -  bitmap_clear_bit (unreachable_blocks, bb->index);
> > >FOR_EACH_EDGE (e, ei, bb->preds)
> > > {
> > >   mark_reachable_blocks (unreachable_blocks, e->src);
> > > diff =
> > > --- gcc/ira.c
> > > +++ /tmp/cocci-output-1104678-d8679a-ira.c
> > > @@ -2944,17 +2944,15 @@ mark_elimination (int from, int to)
> > >FOR_EACH_BB_FN (bb, cfun)
> > >  {
> > >r = DF_LR_IN (bb);
> > > -  if (bitmap_bit_p (r, from))
> > > +  if (bitmap_clear_bit(r, from))
> > > {
> > > - bitmap_clear_bit (r, from);
> > >   bitmap_set_bit (r, to);
> > > }
> > >if (! df_live)
> > >  continue;
> > >r = DF_LIVE_IN (bb);
> > > -  if (bitmap_bit_p (r, from))
> > > +  if (bitmap_clear_bit(r, from))
> > > {
> > > - bitmap_clear_bit (r, from);
> > >   bitmap_set_bit (r, to);
> > > }
> > >  }
> > > # in ira.c one would have to fixup the curly braces manually
> > > PS: coccinelle seems to ruin the spaces before braces in the '+' even
> > > though i have written them correctly according to GNU style..
> > >
> >
>


Re: Add EAF_NOT_RETURNED_DIRECTLY

2021-11-02 Thread Richard Biener via Gcc-patches
On Mon, 1 Nov 2021, Jan Hubicka wrote:

> Hi,
> this patch adds EAF_NOT_RETURNED_DIRECTLY which works similarly as
> EAF_NODIRECTESCAPE.  Values pointed to by a given argument may be returned but
> not the argument itself.  This helps PTA quite noticeably because we mostly
> care about tracking points to which given memory location can escape.
> 
> I think this is about last reasonable improvement we can get to EAF
> flags.
> 
> cc1plus disambiguation counts change from:
> 
> Alias oracle query stats:
>   refs_may_alias_p: 77976088 disambiguations, 98744590 queries
>   ref_maybe_used_by_call_p: 572845 disambiguations, 79014622 queries
>   call_may_clobber_ref_p: 340823 disambiguations, 344823 queries
>   nonoverlapping_component_refs_p: 0 disambiguations, 26590 queries
>   nonoverlapping_refs_since_match_p: 31626 disambiguations, 65379 must 
> overlaps, 97963 queries
>   aliasing_component_refs_p: 57414 disambiguations, 11434878 queries
>   TBAA oracle: 27749649 disambiguations 91624184 queries
>14733408 are in alias set 0
>8847139 queries asked about the same object
>139 queries asked about the same alias set
>0 access volatile
>38412201 are dependent in the DAG
>1881648 are aritificially in conflict with void *
> 
> Modref stats:
>   modref use: 23785 disambiguations, 702425 queries
>   modref clobber: 2296391 disambiguations, 22690531 queries
>   5260226 tbaa queries (0.231825 per modref query)
>   731741 base compares (0.032249 per modref query)
> 
> PTA query stats:
>   pt_solution_includes: 12580233 disambiguations, 35854408 queries
>   pt_solutions_intersect: 1409041 disambiguations, 13496899 queries
> 
> To:
> 
> Alias oracle query stats:
>   refs_may_alias_p: 78304485 disambiguations, 98830913 queries
>   ref_maybe_used_by_call_p: 630360 disambiguations, 79308222 queries
>   call_may_clobber_ref_p: 381549 disambiguations, 384627 queries
>   nonoverlapping_component_refs_p: 0 disambiguations, 26299 queries
>   nonoverlapping_refs_since_match_p: 29919 disambiguations, 64917 must 
> overlaps, 95781 queries
>   aliasing_component_refs_p: 57250 disambiguations, 11336880 queries
>   TBAA oracle: 27835747 disambiguations 91534430 queries
>14884868 are in alias set 0
>8933627 queries asked about the same object
>123 queries asked about the same alias set
>0 access volatile
>37974723 are dependent in the DAG
>1905342 are aritificially in conflict with void *
> 
> Modref stats:
>   modref use: 24929 disambiguations, 756294 queries
>   modref clobber: 2334910 disambiguations, 23414495 queries
>   5359212 tbaa queries (0.228884 per modref query)
>   754642 base compares (0.032230 per modref query)
> 
> PTA query stats:
>   pt_solution_includes: 13262256 disambiguations, 36306509 queries
>   pt_solutions_intersect: 1574672 disambiguations, 13638933 queries
> 
> So about 5% more pt_solution_includes and 11% more pt_solutions_intersect
> disambiguations.
> 
> Bootstrapped/regtested x86_64-linux, OK?

OK.

Thanks,
Richard.

> Honza
> 
> gcc/ChangeLog:
> 
>   * tree-core.h (EAF_NOT_RETURNED_DIRECTLY): New flag.
>   (EAF_NOREAD): Renumber.
>   * ipa-modref.c (dump_eaf_flags): Dump EAF_NOT_RETURNED_DIRECTLY.
>   (remove_useless_eaf_flags): Handle EAF_NOT_RETURNED_DIRECTLY
>   (deref_flags): Likewise.
>   (modref_lattice::init): Likewise.
>   (modref_lattice::merge): Likewise.
>   (merge_call_lhs_flags): Likewise.
>   (analyze_ssa_name_flags): Likewise.
>   (modref_merge_call_site_flags): Likewise.
>   * tree-ssa-structalias.c (handle_call_arg): Likewise.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/ipa/modref-3.c: New test.
>   * gcc.dg/tree-ssa/modref-10.c: New test.
> 
> diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
> index d866d9ed6b3..c0aae084dbd 100644
> --- a/gcc/ipa-modref.c
> +++ b/gcc/ipa-modref.c
> @@ -160,6 +160,8 @@ dump_eaf_flags (FILE *out, int flags, bool newline = true)
>  fprintf (out, " unused");
>if (flags & EAF_NOT_RETURNED)
>  fprintf (out, " not_returned");
> +  if (flags & EAF_NOT_RETURNED_DIRECTLY)
> +fprintf (out, " not_returned_directly");
>if (flags & EAF_NOREAD)
>  fprintf (out, " noread");
>if (newline)
> @@ -295,7 +297,7 @@ remove_useless_eaf_flags (int eaf_flags, int ecf_flags, 
> bool returns_void)
>else if (ecf_flags & ECF_PURE)
>  eaf_flags &= ~implicit_pure_eaf_flags;
>else if ((ecf_flags & ECF_NORETURN) || returns_void)
> -eaf_flags &= ~EAF_NOT_RETURNED;
> +eaf_flags &= ~(EAF_NOT_RETURNED | EAF_NOT_RETURNED_DIRECTLY);
>return eaf_flags;
>  }
>  
> @@ -1373,7 +1375,7 @@ memory_access_to (tree op, tree ssa_name)
>  static int
>  deref_flags (int flags, bool ignore_stores)
>  {
> -  int ret = EAF_NODIRECTESCAPE;
> +  int ret = EAF_NODIRECTESCAPE | EAF_NOT_RETURNED_DIRECTLY;
>/* If argu

RE: [PATCH 2/5]AArch64 sve: combine nested if predicates

2021-11-02 Thread Tamar Christina via Gcc-patches
Hi All,

Here’s a respin of the patch.

The following example

void f5(float * restrict z0, float * restrict z1, float *restrict x,
float * restrict y, float c, int n)
{
for (int i = 0; i < n; i++) {
float a = x[i];
float b = y[i];
if (a > b) {
z0[i] = a + b;
if (a > c) {
z1[i] = a - b;
}
}
}
}

generates currently:

ptrue   p3.b, all
ld1wz1.s, p1/z, [x2, x5, lsl 2]
ld1wz2.s, p1/z, [x3, x5, lsl 2]
fcmgt   p0.s, p3/z, z1.s, z0.s
fcmgt   p2.s, p1/z, z1.s, z2.s
fcmgt   p0.s, p0/z, z1.s, z2.s
and p0.b, p0/z, p1.b, p1.b

The conditions for a > b and a > c become separate comparisons.

After this patch we generate:

ld1wz1.s, p0/z, [x2, x5, lsl 2]
ld1wz2.s, p0/z, [x3, x5, lsl 2]
fcmgt   p1.s, p0/z, z1.s, z2.s
fcmgt   p1.s, p1/z, z1.s, z0.s

Where the condition a > b && a > c are folded by using the predicate result of
the previous compare and thus allows the removal of one of the compares.

When never a mask is being generated from an BIT_AND we mask the operands of
the and instead and then just AND the result.

This allows us to be able to CSE the masks and generate the right combination.
However because re-assoc will try to re-order the masks in the & we have to now
perform a small local CSE on the vectorized loop is vectorization is successful.

Note: This patch series is working incrementally towards generating the most
  efficient code for this and other loops in small steps.

Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and no 
issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* tree-vect-stmts.c (prepare_load_store_mask): When combining two masks
mask the operands instead of the combined operation.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/pred-combine-and.c: New test.

--- inline copy of patch ---

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
new file mode 100644
index 
..ed7fb591ec69dbdafe27fc9aa08a0b0910c94003
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 --save-temps" } */
+
+void f5(float * restrict z0, float * restrict z1, float *restrict x, float * 
restrict y, float c, int n)
+{
+for (int i = 0; i < n; i++) {
+float a = x[i];
+float b = y[i];
+if (a > b) {
+z0[i] = a + b;
+if (a > c) {
+z1[i] = a - b;
+}
+}
+}
+}
+
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, 
z[0-9]+\.s, z[0-9]+\.s} 2 } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 
1f56e10709e8f27d768c04f7ef914e2cd9347c36..27ee48aea429810a3d907435a92b8fd1817d
 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -6302,10 +6302,39 @@ vectorizable_operation (vec_info *vinfo,
}
   else
{
+ /* When combining two masks check is either of them has already been
+combined with a loop mask, if that's the case we can mark that the
+new combined mask doesn't need to be combined with a loop mask.  */
+ if (masked_loop_p && code == BIT_AND_EXPR)
+   {
+ scalar_cond_masked_key cond1 (op0, ncopies);
+ if (loop_vinfo->scalar_cond_masked_set.contains (cond1))
+   {
+ tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+ vectype, i);
+
+ vop0 = prepare_load_store_mask (TREE_TYPE (mask), mask, vop0, 
gsi);
+ scalar_cond_masked_key cond (scalar_dest, ncopies);
+ loop_vinfo->scalar_cond_masked_set.add (cond);
+   }
+
+ scalar_cond_masked_key cond2 (op1, ncopies);
+ if (loop_vinfo->scalar_cond_masked_set.contains (cond2))
+   {
+ tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
+ vectype, i);
+
+ vop1 = prepare_load_store_mask (TREE_TYPE (mask), mask, vop1, 
gsi);
+ scalar_cond_masked_key cond (scalar_dest, ncopies);
+ loop_vinfo->scalar_cond_masked_set.add (cond);
+   }
+   }
+
  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
  new_temp = make_ssa_name (vec_dest, new_stmt);
  gimple_assign_set_lhs (new_stmt, new_temp);
  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
+
  if (vec_cvt_dest)
{
  new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);


rb14777.patch
Description: rb14777.patch


[PATCH]middle-end Add an RPO pass after successful vectorization

2021-11-02 Thread Tamar Christina via Gcc-patches
Hi All,

Following my current SVE predicate optimization series a problem has presented
itself in that the way vector masks are generated for masked operations relies
on CSE to share masks efficiently.

The issue however is that masking is done using the & operand and & is
associative and so reassoc decides to reassociate the masked operations.

This makes CSE then unable to CSE an unmasked and a masked operation leading to
duplicate operations being performed.

To counter this we want to add an RPO pass over the vectorized loop body when
vectorization succeeds.  This makes it then no longer reliant on the RTL level
CSE.

I have not added a testcase for this as it requires the changes in my patch
series, however the entire series relies on this patch to work so all the
tests there cover it.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* tree-vectorizer.c (vectorize_loops): Do local CSE through RPVN upon
successful vectorization.

--- inline copy of patch -- 
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 
4712dc6e7f907637774482a71036a0bd381c2bd2..1e370d60fb19b03c3b6bce45c660af4b6d32dc51
 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -81,7 +81,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-pretty-print.h"
 #include "opt-problem.h"
 #include "internal-fn.h"
-
+#include "tree-ssa-sccvn.h"
 
 /* Loop or bb location, with hotness information.  */
 dump_user_location_t vect_location;
@@ -1323,6 +1323,27 @@ vectorize_loops (void)
 ???  Also while we try hard to update loop-closed SSA form we fail
 to properly do this in some corner-cases (see PR56286).  */
   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
+
+  for (i = 1; i < number_of_loops (cfun); i++)
+   {
+ loop = get_loop (cfun, i);
+ if (!loop || !single_exit (loop))
+   continue;
+
+ bitmap exit_bbs;
+ /* Perform local CSE, this esp. helps because we emit code for
+predicates that need to be shared for optimal predicate usage.
+However reassoc will re-order them and prevent CSE from working
+as it should.  CSE only the loop body, not the entry.  */
+ exit_bbs = BITMAP_ALLOC (NULL);
+ bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
+ bitmap_set_bit (exit_bbs, loop->latch->index);
+
+ do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
+
+ BITMAP_FREE (exit_bbs);
+   }
+
   return TODO_cleanup_cfg;
 }
 


-- 
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 4712dc6e7f907637774482a71036a0bd381c2bd2..1e370d60fb19b03c3b6bce45c660af4b6d32dc51 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -81,7 +81,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-pretty-print.h"
 #include "opt-problem.h"
 #include "internal-fn.h"
-
+#include "tree-ssa-sccvn.h"
 
 /* Loop or bb location, with hotness information.  */
 dump_user_location_t vect_location;
@@ -1323,6 +1323,27 @@ vectorize_loops (void)
 	 ???  Also while we try hard to update loop-closed SSA form we fail
 	 to properly do this in some corner-cases (see PR56286).  */
   rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
+
+  for (i = 1; i < number_of_loops (cfun); i++)
+	{
+	  loop = get_loop (cfun, i);
+	  if (!loop || !single_exit (loop))
+	continue;
+
+	  bitmap exit_bbs;
+	  /* Perform local CSE, this esp. helps because we emit code for
+	 predicates that need to be shared for optimal predicate usage.
+	 However reassoc will re-order them and prevent CSE from working
+	 as it should.  CSE only the loop body, not the entry.  */
+	  exit_bbs = BITMAP_ALLOC (NULL);
+	  bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
+	  bitmap_set_bit (exit_bbs, loop->latch->index);
+
+	  do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
+
+	  BITMAP_FREE (exit_bbs);
+	}
+
   return TODO_cleanup_cfg;
 }
 



[PATCH] Record that -gtoggle is already used in gcc_options.

2021-11-02 Thread Martin Liška

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

When doing flip based on -gtoggle, record it. Otherwise, we will
apply it for the second time in finish_options.

PR debug/102955

gcc/ChangeLog:

* common.opt: Add new gtoggle_used variable.
* opts.c (finish_options): Do not interpret flag_gtoggle twice.

gcc/testsuite/ChangeLog:

* gcc.dg/pr102955.c: New test.
---
 gcc/common.opt  |  4 
 gcc/opts.c  |  3 ++-
 gcc/testsuite/gcc.dg/pr102955.c | 14 ++
 3 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr102955.c

diff --git a/gcc/common.opt b/gcc/common.opt
index 1a5b9bfcca9..2568ecb98b8 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3316,6 +3316,10 @@ gdescribe-dies
 Common Driver Var(flag_describe_dies) Init(0)
 Add description attributes to some DWARF DIEs that have no name attribute.
 
+; True if -gtoggle option was already handled.

+Variable
+bool gtoggle_used
+
 gtoggle
 Common Driver Var(flag_gtoggle)
 Toggle debug information generation.
diff --git a/gcc/opts.c b/gcc/opts.c
index 3f80fce82bc..ef38b8dbab0 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1375,8 +1375,9 @@ finish_options (struct gcc_options *opts, struct 
gcc_options *opts_set,
   profile_flag = 0;
 }
 
-  if (flag_gtoggle)

+  if (flag_gtoggle && !gtoggle_used)
 {
+  gtoggle_used = true;
   if (debug_info_level == DINFO_LEVEL_NONE)
{
  debug_info_level = DINFO_LEVEL_NORMAL;
diff --git a/gcc/testsuite/gcc.dg/pr102955.c b/gcc/testsuite/gcc.dg/pr102955.c
new file mode 100644
index 000..de9689edec4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr102955.c
@@ -0,0 +1,14 @@
+/* PR debug/102955 */
+/* { dg-do compile } */
+/* { dg-options "-g -gtoggle" } */
+
+#pragma GCC optimize "0"
+struct j
+{
+  explicit j ();
+  ~j ();
+};
+void g (void)
+{
+  new j();
+}
--
2.33.1



Re: [PATCH] Add debug counters to back threader.

2021-11-02 Thread Richard Biener via Gcc-patches
On Tue, Nov 2, 2021 at 2:36 PM Aldy Hernandez  wrote:
>
> On Tue, Nov 2, 2021 at 2:27 PM Richard Biener
>  wrote:
> >
> > On Mon, Nov 1, 2021 at 2:03 PM Jeff Law via Gcc-patches
> >  wrote:
> > >
> > >
> > >
> > > On 11/1/2021 3:54 AM, Aldy Hernandez wrote:
> > > > Chasing down stage3 miscomparisons is never fun, and having no way to
> > > > distinguish between jump threads registered by a particular
> > > > pass, is even harder.  This patch adds debug counters for the individual
> > > > back threading passes.  I've left the ethread pass alone, as that one is
> > > > usually benign, but we could easily add it if needed.
> > > >
> > > > The fact that we can only pass one boolean argument to the passes
> > > > infrastructure has us do all sorts of gymnastics to differentiate
> > > > between the various back threading passes.
> > > >
> > > > Tested on x86-64 Linux.
> > > >
> > > > OK?
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >   * dbgcnt.def: Add debug counter for back_thread[12] and
> > > >   back_threadfull[12].
> > > >   * passes.def: Pass "first" argument to each back threading pass.
> > > >   * tree-ssa-threadbackward.c (back_threader::back_threader): Add
> > > >   first argument.
> > > >   (back_threader::debug_counter): New.
> > > >   (back_threader::maybe_register_path): Call debug_counter.
> > > OK
> >
> > But it's ugly.  Very.  Why isn't a single debug-counter good enough?
> > You should be able to reduce to a single threading pass via
> > -fdisable-tree-xyz and then bisect with the debug counter.
>
> Indeed.  I'm not a big fan either.
>
> The -fdisable-tree-xyz approach is my first line of defense, but
> sometimes the problem is a combination of threading passes working in
> tandem.  For example, thread1 threads a path that causes a later
> thread99 pass to find another path.  So I can't just have one counter.
> We need to be able to bisect the thread1 path, and then, if there's
> still a problem, bisect the thread99 pass.
>
> I was fighting a bootstrap miscomparison bug when I could reduce the
> problem to 2 threading passes, and then further to thread1's 123 path,
> and thread2's 567 and 890 paths.  Not fun.

Btw, you can now do -fdbg-cnt=thread:5:892-1 to continue
bisecting the number space of thread2 after fixing '5' for thread1.

And -fdbg-cnt-list will tell you the upper bound of the counters
(for the 1).

But yes, not fun.  But really "bisecting" multiple counters at the same
time doesn't work better than "bisecting" a single counter into
multiple slices.

Richard.

> Aldy
>
> >
> > Alternatively at least store the debug counter to query somewhere
> > so you can have the "hot" path query a single one.  So instead of
> >
> > if (!dbg_cnt (back_thread1))
> >
> > do
> >
> > if (!dbg_cnt (curr_cnt))
> >
> > and compute curr_cnt somewhere.
> >
> > Richard.
> >
> > > jeff
> > >
> >
>


Re: [PATCH] PR fortran/91497 -- Silence conversion warnings for MIN1 and MAX1

2021-11-02 Thread Manfred Schwarb via Gcc-patches
Am 02.11.21 um 14:26 schrieb Thomas Koenig:
> Hi Manfred,
>
>> In addition to the patches of Steve Kargl for PR 91497:
>> The MIN1 and MAX1 intrinsics do explicit type conversions and should
>> be silenced too for -Wconversion and -Wconversion-extra.
>>
>> Adjust testcase to only use *4 and *8 real types, provide a second
>> testcase for *10 and *16 precisions.
> Two points:
>
> We should modify existing test cases only when necessary, because
> modification can impede a regression test.  It is better to create
> a new one.
>

Yes, but this was a quick-and-dirty test of mine, and I realized only afterwards
that Steve had used it as-is. The new testcase is more consistent and more 
complete.
Sandra got errors on targets without REAL(16) support and requested changes,
so I decided to split it.

So you want me to "split" it in 3 parts?
- existing test as is, only for targets with REAL(16) support
- additional tests incl. complex intrinsics for targets with REAL(16) support
- additional tests incl. complex intrinsics for all targets, only single and 
double precision

OTOH, it is perhaps not worth the trouble to do REAL(10) and REAL(16) tests, 
either
it warns or it does not.

> While we do recognize real*4 and real*8 and so on, these are
> non-standard extensions, and I would like to avoid to have these
> with new test cases.
>
> Instead of real*8, you can use real(8) or double precision.
>

Well, double precision is deprecated AFAIK.

> OK with these changes to the test cases.
>
> Thanks for the patch!
>
> Best regards
>
> Thomas



Re: [PATCH]middle-end Add an RPO pass after successful vectorization

2021-11-02 Thread Richard Biener via Gcc-patches
On Tue, 2 Nov 2021, Tamar Christina wrote:

> Hi All,
> 
> Following my current SVE predicate optimization series a problem has presented
> itself in that the way vector masks are generated for masked operations relies
> on CSE to share masks efficiently.
> 
> The issue however is that masking is done using the & operand and & is
> associative and so reassoc decides to reassociate the masked operations.

But it does this for the purpose of canonicalization and thus CSE.

> This makes CSE then unable to CSE an unmasked and a masked operation leading 
> to
> duplicate operations being performed.
> 
> To counter this we want to add an RPO pass over the vectorized loop body when
> vectorization succeeds.  This makes it then no longer reliant on the RTL level
> CSE.
> 
> I have not added a testcase for this as it requires the changes in my patch
> series, however the entire series relies on this patch to work so all the
> tests there cover it.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> x86_64-linux-gnu and no issues.
> 
> Ok for master?

You are running VN over _all_ loop bodies rather only those vectorized.
We loop over vectorized loops earlier for optimizing masked store
sequences.  I suppose you could hook in there.  I'll also notice
that we have pass_pre_slp_scalar_cleanup which eventually runs
plus we have a late FRE.  So I don't understand why it doesn't
work to CSE later.

  for (i = 1; i < number_of_loops (cfun); i++)
{
  loop_vec_info loop_vinfo;
  bool has_mask_store;

  loop = get_loop (cfun, i);
  if (!loop || !loop->aux)
continue;
  loop_vinfo = (loop_vec_info) loop->aux;
  has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
  delete loop_vinfo;
  if (has_mask_store
  && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
optimize_mask_stores (loop);
  loop->aux = NULL;
}


> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>   * tree-vectorizer.c (vectorize_loops): Do local CSE through RPVN upon
>   successful vectorization.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index 
> 4712dc6e7f907637774482a71036a0bd381c2bd2..1e370d60fb19b03c3b6bce45c660af4b6d32dc51
>  100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -81,7 +81,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "gimple-pretty-print.h"
>  #include "opt-problem.h"
>  #include "internal-fn.h"
> -
> +#include "tree-ssa-sccvn.h"
>  
>  /* Loop or bb location, with hotness information.  */
>  dump_user_location_t vect_location;
> @@ -1323,6 +1323,27 @@ vectorize_loops (void)
>???  Also while we try hard to update loop-closed SSA form we fail
>to properly do this in some corner-cases (see PR56286).  */
>rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
> +
> +  for (i = 1; i < number_of_loops (cfun); i++)
> + {
> +   loop = get_loop (cfun, i);
> +   if (!loop || !single_exit (loop))
> + continue;
> +
> +   bitmap exit_bbs;
> +   /* Perform local CSE, this esp. helps because we emit code for
> +  predicates that need to be shared for optimal predicate usage.
> +  However reassoc will re-order them and prevent CSE from working
> +  as it should.  CSE only the loop body, not the entry.  */
> +   exit_bbs = BITMAP_ALLOC (NULL);
> +   bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
> +   bitmap_set_bit (exit_bbs, loop->latch->index);
> +
> +   do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
> +
> +   BITMAP_FREE (exit_bbs);
> + }
> +
>return TODO_cleanup_cfg;
>  }
>  
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)


Re: [PATCH] Record that -gtoggle is already used in gcc_options.

2021-11-02 Thread Richard Biener via Gcc-patches
On Tue, Nov 2, 2021 at 3:11 PM Martin Liška  wrote:
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

I think -gtoggle matches a Defered option and thus should be processed
in handle_common_deferred_options.  I'd argue that --help printing shouldn't
be part of decode_options but done only in toplev.c (not from the
parse_optimize_options caller).

Richard.

> Thanks,
> Martin
>
> When doing flip based on -gtoggle, record it. Otherwise, we will
> apply it for the second time in finish_options.
>
> PR debug/102955
>
> gcc/ChangeLog:
>
> * common.opt: Add new gtoggle_used variable.
> * opts.c (finish_options): Do not interpret flag_gtoggle twice.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/pr102955.c: New test.
> ---
>   gcc/common.opt  |  4 
>   gcc/opts.c  |  3 ++-
>   gcc/testsuite/gcc.dg/pr102955.c | 14 ++
>   3 files changed, 20 insertions(+), 1 deletion(-)
>   create mode 100644 gcc/testsuite/gcc.dg/pr102955.c
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 1a5b9bfcca9..2568ecb98b8 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -3316,6 +3316,10 @@ gdescribe-dies
>   Common Driver Var(flag_describe_dies) Init(0)
>   Add description attributes to some DWARF DIEs that have no name attribute.
>
> +; True if -gtoggle option was already handled.
> +Variable
> +bool gtoggle_used
> +
>   gtoggle
>   Common Driver Var(flag_gtoggle)
>   Toggle debug information generation.
> diff --git a/gcc/opts.c b/gcc/opts.c
> index 3f80fce82bc..ef38b8dbab0 100644
> --- a/gcc/opts.c
> +++ b/gcc/opts.c
> @@ -1375,8 +1375,9 @@ finish_options (struct gcc_options *opts, struct 
> gcc_options *opts_set,
> profile_flag = 0;
>   }
>
> -  if (flag_gtoggle)
> +  if (flag_gtoggle && !gtoggle_used)
>   {
> +  gtoggle_used = true;
> if (debug_info_level == DINFO_LEVEL_NONE)
> {
>   debug_info_level = DINFO_LEVEL_NORMAL;
> diff --git a/gcc/testsuite/gcc.dg/pr102955.c b/gcc/testsuite/gcc.dg/pr102955.c
> new file mode 100644
> index 000..de9689edec4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr102955.c
> @@ -0,0 +1,14 @@
> +/* PR debug/102955 */
> +/* { dg-do compile } */
> +/* { dg-options "-g -gtoggle" } */
> +
> +#pragma GCC optimize "0"
> +struct j
> +{
> +  explicit j ();
> +  ~j ();
> +};
> +void g (void)
> +{
> +  new j();
> +}
> --
> 2.33.1
>


RE: [PATCH]middle-end Add an RPO pass after successful vectorization

2021-11-02 Thread Tamar Christina via Gcc-patches
> -Original Message-
> From: Richard Biener 
> Sent: Tuesday, November 2, 2021 2:24 PM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd 
> Subject: Re: [PATCH]middle-end Add an RPO pass after successful
> vectorization
> 
> On Tue, 2 Nov 2021, Tamar Christina wrote:
> 
> > Hi All,
> >
> > Following my current SVE predicate optimization series a problem has
> > presented itself in that the way vector masks are generated for masked
> > operations relies on CSE to share masks efficiently.
> >
> > The issue however is that masking is done using the & operand and & is
> > associative and so reassoc decides to reassociate the masked operations.
> 
> But it does this for the purpose of canonicalization and thus CSE.

Yes, but it turns something like

(a & b) & mask into a & (b & mask).

When (a & b) is used somewhere else you now lose the CSE.  So it's actually 
hurting
In this case.

> 
> > This makes CSE then unable to CSE an unmasked and a masked operation
> > leading to duplicate operations being performed.
> >
> > To counter this we want to add an RPO pass over the vectorized loop
> > body when vectorization succeeds.  This makes it then no longer
> > reliant on the RTL level CSE.
> >
> > I have not added a testcase for this as it requires the changes in my
> > patch series, however the entire series relies on this patch to work
> > so all the tests there cover it.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and
> > no issues.
> >
> > Ok for master?
> 
> You are running VN over _all_ loop bodies rather only those vectorized.
> We loop over vectorized loops earlier for optimizing masked store sequences.
> I suppose you could hook in there.  I'll also notice that we have
> pass_pre_slp_scalar_cleanup which eventually runs plus we have a late FRE.
> So I don't understand why it doesn't work to CSE later.
> 

Atm, say you have the conditions a > b, and a > b & a > c

We generate

mask1 = (a > b) & loop_mask
mask2 = (a > b & a > c) & loop_mask

with the intention that mask1 can be re-used in mask2.

Reassoc changes this to mask2 = a > b & (a > c & loop_mask)

Which has now unmasked (a > b) in mask2, which leaves us unable to combine
the mask1 and mask2.  It doesn't generate incorrect code, just inefficient.

>   for (i = 1; i < number_of_loops (cfun); i++)
> {
>   loop_vec_info loop_vinfo;
>   bool has_mask_store;
> 
>   loop = get_loop (cfun, i);
>   if (!loop || !loop->aux)
> continue;
>   loop_vinfo = (loop_vec_info) loop->aux;
>   has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
>   delete loop_vinfo;
>   if (has_mask_store
>   && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
> optimize_mask_stores (loop);
>   loop->aux = NULL;
> }
> 

Ah thanks, I'll make the changes.

Thanks,
Tamar

> 
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > * tree-vectorizer.c (vectorize_loops): Do local CSE through RPVN
> upon
> > successful vectorization.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index
> >
> 4712dc6e7f907637774482a71036a0bd381c2bd2..1e370d60fb19b03c3b6bce45c
> 660
> > af4b6d32dc51 100644
> > --- a/gcc/tree-vectorizer.c
> > +++ b/gcc/tree-vectorizer.c
> > @@ -81,7 +81,7 @@ along with GCC; see the file COPYING3.  If not see
> > #include "gimple-pretty-print.h"
> >  #include "opt-problem.h"
> >  #include "internal-fn.h"
> > -
> > +#include "tree-ssa-sccvn.h"
> >
> >  /* Loop or bb location, with hotness information.  */
> > dump_user_location_t vect_location; @@ -1323,6 +1323,27 @@
> > vectorize_loops (void)
> >  ???  Also while we try hard to update loop-closed SSA form we fail
> >  to properly do this in some corner-cases (see PR56286).  */
> >rewrite_into_loop_closed_ssa (NULL,
> > TODO_update_ssa_only_virtuals);
> > +
> > +  for (i = 1; i < number_of_loops (cfun); i++)
> > +   {
> > + loop = get_loop (cfun, i);
> > + if (!loop || !single_exit (loop))
> > +   continue;
> > +
> > + bitmap exit_bbs;
> > + /* Perform local CSE, this esp. helps because we emit code for
> > +predicates that need to be shared for optimal predicate usage.
> > +However reassoc will re-order them and prevent CSE from working
> > +as it should.  CSE only the loop body, not the entry.  */
> > + exit_bbs = BITMAP_ALLOC (NULL);
> > + bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
> > + bitmap_set_bit (exit_bbs, loop->latch->index);
> > +
> > + do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
> > +
> > + BITMAP_FREE (exit_bbs);
> > +   }
> > +
> >return TODO_cleanup_cfg;
> >  }
> >
> >
> >
> >
> 
> --
> Richard Biener 
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409
> Nuernberg, Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)


Re: [PATCH 0/5] Fortran manual updates

2021-11-02 Thread Sandra Loosemore

On 11/2/21 2:51 AM, Martin Liška wrote:

On 11/2/21 00:56, Sandra Loosemore wrote:

I'll wait a couple days before committing these patches, in case
anybody wants to give some feedback, especially on technical issues.


Hello.

Appreciate the work you did, but the patchset will cause quite some 
conflicts

in the prepared Sphinx migration patch I've sent to the mailing list :/
Anyway, I will rebase my patches. For the future, are you planning doing 
similar
documentation reorganization for a manual? Based on discussion with 
Gerald, I hope

we can finish the transition before the end of this year.


My understanding was that, if this conversion is indeed going to happen, 
it's going to be automated by scripts?  I hadn't seen any discussion of 
it on the list for months and thought the whole idea was on hold or 
scrapped, since it hasn't happened yet.  In any case it does not seem 
reasonable to freeze the current Texinfo docs for months while waiting 
for it to happen, especially as we are heading into the end of the 
release cycle and people are finishing up changes and new features they 
need to document.


-Sandra


[PATCH] IBM Z: ldist-{rawmemchr, strlen} tests require vector extensions

2021-11-02 Thread Stefan Schulze Frielinghaus via Gcc-patches
The tests require vector extensions which are only available for z13 and
later while using the z/Architecture.

Bootstrapped and regtested on IBM Z.  Ok for mainline?

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ldist-rawmemchr-1.c: For IBM Z set arch to z13
and use z/Architecture since the tests require vector extensions.
* gcc.dg/tree-ssa/ldist-rawmemchr-2.c: Likewise.
* gcc.dg/tree-ssa/ldist-strlen-1.c: Likewise.
* gcc.dg/tree-ssa/ldist-strlen-3.c: Likewise.
---
 gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c | 1 +
 gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-2.c | 1 +
 gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-1.c| 1 +
 gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-3.c| 1 +
 4 files changed, 4 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c
index 6abfd278351..bf6335f6360 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-1.c
@@ -1,5 +1,6 @@
 /* { dg-do run { target s390x-*-* } } */
 /* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */
+/* { dg-additional-options "-march=z13 -mzarch" { target s390x-*-* } } */
 /* { dg-final { scan-tree-dump-times "generated rawmemchrQI" 2 "ldist" { 
target s390x-*-* } } } */
 /* { dg-final { scan-tree-dump-times "generated rawmemchrHI" 2 "ldist" { 
target s390x-*-* } } } */
 /* { dg-final { scan-tree-dump-times "generated rawmemchrSI" 2 "ldist" { 
target s390x-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-2.c
index 00d6ea0f8e9..83f5a35a322 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-rawmemchr-2.c
@@ -1,5 +1,6 @@
 /* { dg-do run { target s390x-*-* } } */
 /* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */
+/* { dg-additional-options "-march=z13 -mzarch" { target s390x-*-* } } */
 /* { dg-final { scan-tree-dump-times "generated rawmemchrQI" 2 "ldist" { 
target s390x-*-* } } } */
 /* { dg-final { scan-tree-dump-times "generated rawmemchrHI" 2 "ldist" { 
target s390x-*-* } } } */
 /* { dg-final { scan-tree-dump-times "generated rawmemchrSI" 2 "ldist" { 
target s390x-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-1.c
index 918b60099e4..aeb04b91f6b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-1.c
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */
+/* { dg-additional-options "-march=z13 -mzarch" { target s390x-*-* } } */
 /* { dg-final { scan-tree-dump-times "generated strlenQI\n" 4 "ldist" } } */
 /* { dg-final { scan-tree-dump-times "generated strlenHI\n" 4 "ldist" { target 
s390x-*-* } } } */
 /* { dg-final { scan-tree-dump-times "generated strlenSI\n" 4 "ldist" { target 
s390x-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-3.c
index 370fd5eb088..0652857265a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-strlen-3.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details" } */
+/* { dg-additional-options "-march=z13 -mzarch" { target s390x-*-* } } */
 /* { dg-final { scan-tree-dump-times "generated strlenSI\n" 1 "ldist" { target 
s390x-*-* } } } */
 
 extern int s[];
-- 
2.31.1



Re: [PATCH] dwarf: Multi-register CFI address support.

2021-11-02 Thread Hafiz Abid Qadeer
Ping.

On 24/08/2021 16:55, Hafiz Abid Qadeer wrote:
> Ping.
> 
> On 22/07/2021 11:58, Hafiz Abid Qadeer wrote:
>> Ping.
>>
>> On 13/06/2021 14:27, Hafiz Abid Qadeer wrote:
>>> Add support for architectures such as AMD GCN, in which the pointer size is
>>> larger than the register size.  This allows the CFI information to include
>>> multi-register locations for the stack pointer, frame pointer, and return
>>> address.
>>>
>>> This patch was originally posted by Andrew Stubbs in
>>> https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552873.html
>>>
>>> It has now been re-worked according to the review comments. It does not use
>>> DW_OP_piece or DW_OP_LLVM_piece_end. Instead it uses
>>> DW_OP_bregx/DW_OP_shl/DW_OP_bregx/DW_OP_plus to build the CFA from multiple
>>> consecutive registers. Here is how .debug_frame looks before and after this
>>> patch:
>>>
>>> $ cat factorial.c
>>> int factorial(int n) {
>>>   if (n == 0) return 1;
>>>   return n * factorial (n - 1);
>>> }
>>>
>>> $ amdgcn-amdhsa-gcc -g factorial.c -O0 -c -o fac.o
>>> $ llvm-dwarfdump -debug-frame fac.o
>>>
>>> *** without this patch (edited for brevity)***
>>>
>>>  0014  CIE
>>>
>>>   DW_CFA_def_cfa: reg48 +0
>>>   DW_CFA_register: reg16 reg50
>>>
>>> 0018 002c  FDE cie= pc=...01ac
>>>   DW_CFA_advance_loc4: 96
>>>   DW_CFA_offset: reg46 0
>>>   DW_CFA_offset: reg47 4
>>>   DW_CFA_offset: reg50 8
>>>   DW_CFA_offset: reg51 12
>>>   DW_CFA_offset: reg16 8
>>>   DW_CFA_advance_loc4: 4
>>>   DW_CFA_def_cfa_sf: reg46 -16
>>>
>>> *** with this patch (edited for brevity)***
>>>
>>>  0024  CIE
>>>
>>>   DW_CFA_def_cfa_expression: DW_OP_bregx SGPR49+0, DW_OP_const1u 0x20, 
>>> DW_OP_shl, DW_OP_bregx SGPR48+0, DW_OP_plus
>>>   DW_CFA_expression: reg16 DW_OP_bregx SGPR51+0, DW_OP_const1u 0x20, 
>>> DW_OP_shl, DW_OP_bregx SGPR50+0, DW_OP_plus
>>>
>>> 0028 003c  FDE cie= pc=...01ac
>>>   DW_CFA_advance_loc4: 96
>>>   DW_CFA_offset: reg46 0
>>>   DW_CFA_offset: reg47 4
>>>   DW_CFA_offset: reg50 8
>>>   DW_CFA_offset: reg51 12
>>>   DW_CFA_offset: reg16 8
>>>   DW_CFA_advance_loc4: 4
>>>   DW_CFA_def_cfa_expression: DW_OP_bregx SGPR47+0, DW_OP_const1u 0x20, 
>>> DW_OP_shl, DW_OP_bregx SGPR46+0, DW_OP_plus, DW_OP_lit16, DW_OP_minus
>>>
>>> gcc/ChangeLog:
>>>
>>> * dwarf2cfi.c (dw_stack_pointer_regnum): Change type to struct cfa_reg.
>>> (dw_frame_pointer_regnum): Likewise.
>>> (new_cfi_row): Use set_by_dwreg.
>>> (get_cfa_from_loc_descr): Use set_by_dwreg.  Support register spans.
>>> handle DW_OP_bregx with DW_OP_breg{0-31}. Support DW_OP_lit*,
>>> DW_OP_const*, DW_OP_minus, DW_OP_shl and DW_OP_plus.
>>> (lookup_cfa_1): Use set_by_dwreg.
>>> (def_cfa_0): Update for cfa_reg and support register spans.
>>> (reg_save): Change sreg parameter to struct cfa_reg.  Support register
>>> spans.
>>> (dwf_cfa_reg): New function.
>>> (dwarf2out_flush_queued_reg_saves): Use dwf_cfa_reg instead of
>>> dwf_regno.
>>> (dwarf2out_frame_debug_def_cfa): Likewise.
>>> (dwarf2out_frame_debug_adjust_cfa): Likewise.
>>> (dwarf2out_frame_debug_cfa_offset): Likewise.  Update reg_save usage.
>>> (dwarf2out_frame_debug_cfa_register): Likewise.
>>> (dwarf2out_frame_debug_expr): Likewise.
>>> (create_pseudo_cfg): Use set_by_dwreg.
>>> (initial_return_save): Use set_by_dwreg and dwf_cfa_reg,
>>> (create_cie_data): Use dwf_cfa_reg.
>>> (execute_dwarf2_frame): Use dwf_cfa_reg.
>>> (dump_cfi_row): Use set_by_dwreg.
>>> * dwarf2out.c (build_span_loc, build_breg_loc): New function.
>>> (build_cfa_loc): Support register spans.
>>> (build_cfa_aligned_loc): Update cfa_reg usage.
>>> (convert_cfa_to_fb_loc_list): Use set_by_dwreg.
>>> * dwarf2out.h (struct cfa_reg): New type.
>>> (struct dw_cfa_location): Use struct cfa_reg.
>>> (build_span_loc): New prototype.
>>> * gengtype.c (main): Accept poly_uint16_pod type.
>>> ---
>>>  gcc/dwarf2cfi.c | 260 
>>>  gcc/dwarf2out.c |  55 +-
>>>  gcc/dwarf2out.h |  37 ++-
>>>  gcc/gengtype.c  |   1 +
>>>  4 files changed, 283 insertions(+), 70 deletions(-)
>>>
>>> diff --git a/gcc/dwarf2cfi.c b/gcc/dwarf2cfi.c
>>> index c27ac1960b0..5aacdcd094a 100644
>>> --- a/gcc/dwarf2cfi.c
>>> +++ b/gcc/dwarf2cfi.c
>>> @@ -229,8 +229,8 @@ static vec queued_reg_saves;
>>>  static bool any_cfis_emitted;
>>>  
>>>  /* Short-hand for commonly used register numbers.  */
>>> -static unsigned dw_stack_pointer_regnum;
>>> -static unsigned dw_frame_pointer_regnum;
>>> +static struct cfa_reg dw_stack_pointer_regnum;
>>> +static struct cfa_reg dw_frame_pointer_regnum;
>>>  
>>>  /* Hook used by __throw.  */
>>>  
>>> @@ -430,7 +430,7 @@ new_cfi_row (void)
>>>  {
>>>dw_cfi_row *row = ggc_cleared_alloc ();
>>>  
>>> -  row->cfa.reg = INVALID_REGNUM;
>>> +  row->cfa.reg.set_by_dwreg 

Re: [PATCH] RISC-V: Fix build errors with shNadd/shNadd.uw patterns in zba cost model

2021-11-02 Thread Kito Cheng via Gcc-patches
Hi Maciej:

On Mon, Nov 1, 2021 at 8:41 PM Maciej W. Rozycki  wrote:
>
> Fix a build regression from commit 04a9b554ba1a ("RISC-V: Cost model
> for zba extension."):
>
> .../gcc/config/riscv/riscv.c: In function 'bool riscv_rtx_costs(rtx, 
> machine_mode, int, int, int*, bool)':
> .../gcc/config/riscv/riscv.c:2018:11: error: 'and' of mutually exclusive 
> equal-tests is always 0 [-Werror]
>  2018 |   && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 0)), 1, 3))
>   |   ^~
> .../gcc/config/riscv/riscv.c:2047:17: error: unused variable 'ashift_lhs' 
> [-Werror=unused-variable]
>  2047 | rtx ashift_lhs = XEXP (and_lhs, 0);
>   | ^~
>
>
> by removing an incorrect REG_P check applied to a constant expression
> and getting rid of the unused variable.
>
> gcc/
> * config/riscv/riscv.c (riscv_rtx_costs): Remove a REG_P check
> and an unused local variable with shNadd/shNadd.uw pattern
> handling.
> ---
> Hi,
>
>  As described above and I guess almost obvious -- I gather the code was
> only verified with a `-Wno-error' build and the handling of the shNadd
> pattern has not been actually covered owing to this bug making the
> condition impossible to match.
>
>  OK to apply then?
>
>   Maciej
> ---
>  gcc/config/riscv/riscv.c |2 --
>  1 file changed, 2 deletions(-)
>
> gcc-riscv-rtx-costs-zba-shnadd.diff
> Index: gcc/gcc/config/riscv/riscv.c
> ===
> --- gcc.orig/gcc/config/riscv/riscv.c
> +++ gcc/gcc/config/riscv/riscv.c
> @@ -2013,7 +2013,6 @@ riscv_rtx_costs (rtx x, machine_mode mod
>   && ((!TARGET_64BIT && (mode == SImode)) ||
>   (TARGET_64BIT && (mode == DImode)))
>   && (GET_CODE (XEXP (x, 0)) == ASHIFT)
> - && REG_P (XEXP (XEXP (x, 0), 0))
>   && CONST_INT_P (XEXP (XEXP (x, 0), 0))
>   && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 0)), 1, 3))

I think that's my mistake...it should fix following check rather than
remove the REG_P like that:

@@ -2014,8 +2014,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int
outer_code, int opno ATTRIBUTE_UN
 (TARGET_64BIT && (mode == DImode)))
 && (GET_CODE (XEXP (x, 0)) == ASHIFT)
 && REG_P (XEXP (XEXP (x, 0), 0))
- && CONST_INT_P (XEXP (XEXP (x, 0), 0))
- && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 0)), 1, 3))
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
   {
 *total = COSTS_N_INSNS (1);
 return true;


shNadd pattern:

(define_insn "*shNadd"
 [(set (match_operand:X 0 "register_operand" "=r")
   (plus:X (ashift:X (match_operand:X 1 "register_operand" "r")
 # What I want to check is here, it should be
XEXP (XEXP (x, 0), 1)
 (match_operand:QI 2 "immediate_operand" "I"))
   (match_operand:X 3 "register_operand" "r")))]

Otherwise LGTM, feel free to commit once you address this issue.

Thanks!


Re: [PATCH] IBM Z: ldist-{rawmemchr,strlen} tests require vector extensions

2021-11-02 Thread Andreas Krebbel via Gcc-patches
On 11/2/21 15:54, Stefan Schulze Frielinghaus wrote:
> The tests require vector extensions which are only available for z13 and
> later while using the z/Architecture.
> 
> Bootstrapped and regtested on IBM Z.  Ok for mainline?
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/tree-ssa/ldist-rawmemchr-1.c: For IBM Z set arch to z13
>   and use z/Architecture since the tests require vector extensions.
>   * gcc.dg/tree-ssa/ldist-rawmemchr-2.c: Likewise.
>   * gcc.dg/tree-ssa/ldist-strlen-1.c: Likewise.
>   * gcc.dg/tree-ssa/ldist-strlen-3.c: Likewise.

Ok. Thanks!

Andreas


Re: [PATCH 2/5]AArch64 sve: combine nested if predicates

2021-11-02 Thread Richard Sandiford via Gcc-patches
Tamar Christina  writes:
> Hi All,
>
> Here’s a respin of the patch.
>
> The following example
>
> void f5(float * restrict z0, float * restrict z1, float *restrict x,
>   float * restrict y, float c, int n)
> {
> for (int i = 0; i < n; i++) {
> float a = x[i];
> float b = y[i];
> if (a > b) {
> z0[i] = a + b;
> if (a > c) {
> z1[i] = a - b;
> }
> }
> }
> }
>
> generates currently:
>
> ptrue   p3.b, all
> ld1wz1.s, p1/z, [x2, x5, lsl 2]
> ld1wz2.s, p1/z, [x3, x5, lsl 2]
> fcmgt   p0.s, p3/z, z1.s, z0.s
> fcmgt   p2.s, p1/z, z1.s, z2.s
> fcmgt   p0.s, p0/z, z1.s, z2.s
> and p0.b, p0/z, p1.b, p1.b
>
> The conditions for a > b and a > c become separate comparisons.
>
> After this patch we generate:
>
> ld1wz1.s, p0/z, [x2, x5, lsl 2]
> ld1wz2.s, p0/z, [x3, x5, lsl 2]
> fcmgt   p1.s, p0/z, z1.s, z2.s
> fcmgt   p1.s, p1/z, z1.s, z0.s
>
> Where the condition a > b && a > c are folded by using the predicate result of
> the previous compare and thus allows the removal of one of the compares.
>
> When never a mask is being generated from an BIT_AND we mask the operands of
> the and instead and then just AND the result.
>
> This allows us to be able to CSE the masks and generate the right combination.
> However because re-assoc will try to re-order the masks in the & we have to 
> now
> perform a small local CSE on the vectorized loop is vectorization is 
> successful.
>
> Note: This patch series is working incrementally towards generating the most
>   efficient code for this and other loops in small steps.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and no 
> issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>   * tree-vect-stmts.c (prepare_load_store_mask): When combining two masks
>   mask the operands instead of the combined operation.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/aarch64/sve/pred-combine-and.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
> new file mode 100644
> index 
> ..ed7fb591ec69dbdafe27fc9aa08a0b0910c94003
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-combine-and.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 --save-temps" } */
> +
> +void f5(float * restrict z0, float * restrict z1, float *restrict x, float * 
> restrict y, float c, int n)
> +{
> +for (int i = 0; i < n; i++) {
> +float a = x[i];
> +float b = y[i];
> +if (a > b) {
> +z0[i] = a + b;
> +if (a > c) {
> +z1[i] = a - b;
> +}
> +}
> +}
> +}
> +
> +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-9]+/z, 
> z[0-9]+\.s, z[0-9]+\.s} 2 } } */
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 
> 1f56e10709e8f27d768c04f7ef914e2cd9347c36..27ee48aea429810a3d907435a92b8fd1817d
>  100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -6302,10 +6302,39 @@ vectorizable_operation (vec_info *vinfo,
>   }
>else
>   {
> +   /* When combining two masks check is either of them has already been
> +  combined with a loop mask, if that's the case we can mark that the

I think something like:
   s/has already been combined with/is elsewhere combined with/
might be clearer.  The other statement that ANDs with the loop mask might
come before or after this one.  In the latter case it won't have been
vectorised yet.

> +  new combined mask doesn't need to be combined with a loop mask.  */

The way I imagined this last part working is that, after vectorising
a BIT_AND_EXPR like this:

  vop0' = vop & loop_mask_N
  vres = vop0' & vop1

we should record that vres is effectively already ANDed with loop_mask_N,
and so there's no need to create:

  vres' = vres & loop_mask_N

when vectorising the innermost IFN_MASK_STORE.

This could be a new hash_set in the loop_vec_info, containing
(condition, loop_mask) pairs for which condition & loop_mask == condition.
prepare_load_store_mask could check whether (vec_mask, loop_mask) is in
this set and return vec_mask unaltered if so.

This new set would in a sense have the opposite effect of
scalar_cond_masked_key.  scalar_cond_masked_key encourage statements
to create ANDs with the loop mask in cases where they might not have
done otherwise.  The new set would instead tell statements (and
specifically prepare_load_store_mask) that such ANDs aren't necessary.

I guess we should also pass other ANDs with the loop mask through
prepare_load_store_mask (and rename it), so that they get the benefit too.

It looks like the patch tried to do that by entering scalar_dest
i

Re: [PATCH] Record that -gtoggle is already used in gcc_options.

2021-11-02 Thread Martin Liška

On 11/2/21 15:33, Richard Biener wrote:

I think -gtoggle matches a Defered option and thus should be processed
in handle_common_deferred_options.


Well, that's quite problematic as I handle_common_deferred_options is called
after decode_options (that calls finish_options).

Note there's direct dependency at very end of finish_options in between -gtoggle
and debug_nonbind_markers_p:


  if (flag_gtoggle)
{
  if (debug_info_level == DINFO_LEVEL_NONE)
{
  debug_info_level = DINFO_LEVEL_NORMAL;

  if (write_symbols == NO_DEBUG)
write_symbols = PREFERRED_DEBUGGING_TYPE;
}
  else
debug_info_level = DINFO_LEVEL_NONE;
}

  if (!OPTION_SET_P (debug_nonbind_markers_p))
debug_nonbind_markers_p
  = (optimize
 && debug_info_level >= DINFO_LEVEL_NORMAL
 && dwarf_debuginfo_p ()
 && !(flag_selective_scheduling || flag_selective_scheduling2));

I don't see who you mean the possible fix?

Martin


[PATCH] RISC-V: Fix register class subset checks for CLASS_MAX_NREGS

2021-11-02 Thread Maciej W. Rozycki
Fix the register class subset checks in the determination of the maximum 
number of consecutive registers needed to hold a value of a given mode.  

The number depends on whether a register is a general-purpose or a 
floating-point register, so check whether the register class requested 
is a subset (argument 1 to `reg_class_subset_p') rather than superset 
(argument 2) of GR_REGS or FP_REGS class respectively.

gcc/
* config/riscv/riscv.c (riscv_class_max_nregs): Swap the 
arguments to `reg_class_subset_p'.
---
Hi,

 This looks like a thinko to me, but please double-check I have not become 
confused myself.

 My understanding is that current code works, because the SIBCALL_REGS and 
JALR_REGS classes are only ever used for addresses, which won't ever span 
multiple registers, and then the only class other than ALL_REGS that 
inludes floating-point registers is FP_REGS.  Therefore the hook will only 
ever see either GR_REGS or FP_REGS as the class enquired about and 
consequently the result of the `reg_class_subset_p' calls will be the same 
regardless of the order of the arguments chosen.  We should be getting the 
semantics right regardless.

 Regression-tested with the `riscv64-linux-gnu' target.  OK to apply?

  Maciej
---
 gcc/config/riscv/riscv.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

gcc-riscv-class-max-nregs.diff
Index: gcc/gcc/config/riscv/riscv.c
===
--- gcc.orig/gcc/config/riscv/riscv.c
+++ gcc/gcc/config/riscv/riscv.c
@@ -4811,10 +4811,10 @@ riscv_modes_tieable_p (machine_mode mode
 static unsigned char
 riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
 {
-  if (reg_class_subset_p (FP_REGS, rclass))
+  if (reg_class_subset_p (rclass, FP_REGS))
 return riscv_hard_regno_nregs (FP_REG_FIRST, mode);
 
-  if (reg_class_subset_p (GR_REGS, rclass))
+  if (reg_class_subset_p (rclass, GR_REGS))
 return riscv_hard_regno_nregs (GP_REG_FIRST, mode);
 
   return 0;


Re: [PATCH 0/5] Fortran manual updates

2021-11-02 Thread Martin Liška

On 11/2/21 15:48, Sandra Loosemore wrote:

On 11/2/21 2:51 AM, Martin Liška wrote:

On 11/2/21 00:56, Sandra Loosemore wrote:

I'll wait a couple days before committing these patches, in case
anybody wants to give some feedback, especially on technical issues.


Hello.

Appreciate the work you did, but the patchset will cause quite some conflicts
in the prepared Sphinx migration patch I've sent to the mailing list :/
Anyway, I will rebase my patches. For the future, are you planning doing similar
documentation reorganization for a manual? Based on discussion with Gerald, I 
hope
we can finish the transition before the end of this year.


My understanding was that, if this conversion is indeed going to happen, it's 
going to be automated by scripts?


Exactly, but the conversion needs some manual post-processing that I've already 
done.


  I hadn't seen any discussion of it on the list for months and thought the 
whole idea was on hold or scrapped, since it hasn't happened yet.


There was almost no response, so that's why I contacted Gerald about help.


In any case it does not seem reasonable to freeze the current Texinfo docs for 
months while waiting for it to happen, especially as we are heading into the 
end of the release cycle and people are finishing up changes and new features 
they need to document.


Sure, I can easily rebase normal changes, but you are suggesting a complete 
redesign/renaming. It's going to take me some time,
but I'll rebase my patches.

Thanks for understanding,
Martin



-Sandra




RE: [PATCH]middle-end Add an RPO pass after successful vectorization

2021-11-02 Thread Richard Biener via Gcc-patches
On Tue, 2 Nov 2021, Tamar Christina wrote:

> > -Original Message-
> > From: Richard Biener 
> > Sent: Tuesday, November 2, 2021 2:24 PM
> > To: Tamar Christina 
> > Cc: gcc-patches@gcc.gnu.org; nd 
> > Subject: Re: [PATCH]middle-end Add an RPO pass after successful
> > vectorization
> > 
> > On Tue, 2 Nov 2021, Tamar Christina wrote:
> > 
> > > Hi All,
> > >
> > > Following my current SVE predicate optimization series a problem has
> > > presented itself in that the way vector masks are generated for masked
> > > operations relies on CSE to share masks efficiently.
> > >
> > > The issue however is that masking is done using the & operand and & is
> > > associative and so reassoc decides to reassociate the masked operations.
> > 
> > But it does this for the purpose of canonicalization and thus CSE.
> 
> Yes, but it turns something like
> 
> (a & b) & mask into a & (b & mask).
> 
> When (a & b) is used somewhere else you now lose the CSE.  So it's actually 
> hurting
> In this case.

OK, so that's a known "issue" with reassoc, it doesn't consider global
CSE opportunities and I guess it pushes 'mask' to leaf if it is loop
carried.

> > 
> > > This makes CSE then unable to CSE an unmasked and a masked operation
> > > leading to duplicate operations being performed.
> > >
> > > To counter this we want to add an RPO pass over the vectorized loop
> > > body when vectorization succeeds.  This makes it then no longer
> > > reliant on the RTL level CSE.
> > >
> > > I have not added a testcase for this as it requires the changes in my
> > > patch series, however the entire series relies on this patch to work
> > > so all the tests there cover it.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and
> > > no issues.
> > >
> > > Ok for master?
> > 
> > You are running VN over _all_ loop bodies rather only those vectorized.
> > We loop over vectorized loops earlier for optimizing masked store sequences.
> > I suppose you could hook in there.  I'll also notice that we have
> > pass_pre_slp_scalar_cleanup which eventually runs plus we have a late FRE.
> > So I don't understand why it doesn't work to CSE later.
> > 
> 
> Atm, say you have the conditions a > b, and a > b & a > c
> 
> We generate
> 
> mask1 = (a > b) & loop_mask
> mask2 = (a > b & a > c) & loop_mask
> 
> with the intention that mask1 can be re-used in mask2.
> 
> Reassoc changes this to mask2 = a > b & (a > c & loop_mask)
> 
> Which has now unmasked (a > b) in mask2, which leaves us unable to combine
> the mask1 and mask2.  It doesn't generate incorrect code, just inefficient.
> 
> >   for (i = 1; i < number_of_loops (cfun); i++)
> > {
> >   loop_vec_info loop_vinfo;
> >   bool has_mask_store;
> > 
> >   loop = get_loop (cfun, i);
> >   if (!loop || !loop->aux)
> > continue;
> >   loop_vinfo = (loop_vec_info) loop->aux;
> >   has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
> >   delete loop_vinfo;
> >   if (has_mask_store
> >   && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
> > optimize_mask_stores (loop);
> >   loop->aux = NULL;
> > }
> > 
> 
> Ah thanks, I'll make the changes.

Note I think that full-blown CSE is a bit overkill just to counter
a deficient reassoc (or VN).  At least it is supposed to be "cheap"
and can be conditionalized on loop masks being used as well.

> Thanks,
> Tamar
> 
> > 
> > > Thanks,
> > > Tamar
> > >
> > > gcc/ChangeLog:
> > >
> > >   * tree-vectorizer.c (vectorize_loops): Do local CSE through RPVN
> > upon
> > >   successful vectorization.
> > >
> > > --- inline copy of patch --
> > > diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index
> > >
> > 4712dc6e7f907637774482a71036a0bd381c2bd2..1e370d60fb19b03c3b6bce45c
> > 660
> > > af4b6d32dc51 100644
> > > --- a/gcc/tree-vectorizer.c
> > > +++ b/gcc/tree-vectorizer.c
> > > @@ -81,7 +81,7 @@ along with GCC; see the file COPYING3.  If not see
> > > #include "gimple-pretty-print.h"
> > >  #include "opt-problem.h"
> > >  #include "internal-fn.h"
> > > -
> > > +#include "tree-ssa-sccvn.h"
> > >
> > >  /* Loop or bb location, with hotness information.  */
> > > dump_user_location_t vect_location; @@ -1323,6 +1323,27 @@
> > > vectorize_loops (void)
> > >???  Also while we try hard to update loop-closed SSA form we fail
> > >to properly do this in some corner-cases (see PR56286).  */
> > >rewrite_into_loop_closed_ssa (NULL,
> > > TODO_update_ssa_only_virtuals);
> > > +
> > > +  for (i = 1; i < number_of_loops (cfun); i++)
> > > + {
> > > +   loop = get_loop (cfun, i);
> > > +   if (!loop || !single_exit (loop))
> > > + continue;
> > > +
> > > +   bitmap exit_bbs;
> > > +   /* Perform local CSE, this esp. helps because we emit code for
> > > +  predicates that need to be shared for optimal predicate usage.
> > > +  However reassoc will re-order them and prevent CSE from working
> > > +  as it should.  CSE only th

Re: Building GNU Arm Embedded Toolchain for macOS/arm64

2021-11-02 Thread Iain Sandoe
Hi Romain

> On 2 Nov 2021, at 13:09, Romain Goyet via Gcc-patches 
>  wrote:

> Arm distribute pre-built versions of GCC that targets bare-metal Cortex-M
> devices at
> https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm

> I have written a few small patches to get this toolchain to build and run
> on macOS/arm64. Should I submit them somewhere?

The work to implement Arm64 on macOS is on-going (prototype is here:
https://github.com/iains/gcc-darwin-arm64)

The patches for host-side support are mostly ready (probably your patches will 
be
quite similar to those on the prototype, but if there’s something extra we’ve 
missed so far
please feel free to post them to this list - please cc me).

I am hoping to get the host-side stuff into master first (and reasonably soon).

thanks
Iain

main host-side support:

https://github.com/iains/gcc-darwin-arm64/commit/af097efc24a72c005756b05f65f0f450e41340ed
+ next four patches (deals with the fact that Arm64-darwin cannot use PCH).

driver:
https://github.com/iains/gcc-darwin-arm64/commit/5757cced1e1c8d4f0ec5458b9af7154d694e400b

ada host-side tools.
https://github.com/iains/gcc-darwin-arm64/commit/c16becf974da73646eb7b5e356323ffa4a585b21



[RFA] Minor optimization of variable bit testing

2021-11-02 Thread Jeff Law


I was wandering spec chasing down instances where we should be 
generating bit-test, bit-set and bit-clear types of instructions for our 
target when I ran across a generic missed optimization in this space.



(((1 << N) & C) != 0)  -> (N == C')
(((1 << N) & C) == 0)  -> (N != C')

Where C is a constant power of 2 and C' is log2 (C).



That obviously avoids the shift by a variable amount and the bit masking 
which is the primary effect.  I did see cases where we were able to 
constant propagate into uses of N, but those were only in PHI nodes and 
never triggered any real secondary effects in the cases I looked at.



Anyway, it's a fairly minor optimization, but with the analysis done and 
patch in hand, it's silly not to take the easy win.



Bootstrapped and regression tested on x86_64 and verified that the 
affected spec benchmark (gcc itself) still passes on our target.


OK for the trunk?  Note I added the patterns at the end of match.pd.  
Certainly open to moving them elsewhere.



Jeff
diff --git a/gcc/match.pd b/gcc/match.pd
index 4fbba3922e5..b275631555d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6835,3 +6835,20 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
to the number of trailing zeroes.  */
 (match (ctz_table_index @1 @2 @3)
   (rshift (mult (bit_and:c (negate @1) @1) INTEGER_CST@2) INTEGER_CST@3))
+
+/* ((1 << n) & M) != 0  -> n == log2 (M) */
+(simplify
+ (ne
+  (bit_and
+   (nop_convert? (lshift integer_onep@0 @1)) integer_pow2p@2) integer_zerop@3)
+ (eq @1 { build_int_cst (integer_type_node,
+ wi::exact_log2 (wi::to_wide (@2))); }))
+
+/* ((1 << n) & M) == 0  -> n != log2 (M) */
+(simplify
+ (eq
+  (bit_and
+   (nop_convert? (lshift integer_onep@0 @1)) integer_pow2p@2) integer_zerop@3)
+ (ne @1 { build_int_cst (integer_type_node,
+ wi::exact_log2 (wi::to_wide (@2))); }))
+
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bittest.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bittest.c
new file mode 100644
index 000..7d712cad1ee
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bittest.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+
+void bar (void);
+
+void
+foo(unsigned int abc123)
+{
+  unsigned int xyzpdq = (1 << abc123);
+  if ((xyzpdq & 0x800) != 0)
+bar();
+}
+
+void
+baz(unsigned int abc123)
+{
+  unsigned int xyzpdq = (1 << abc123);
+  if ((xyzpdq & 0x800) == 0)
+bar();
+}
+
+/* What we want to verify is that the bit test against xyzpdq is
+   replaced with a test against abc123 which avoids the shifting
+   and bit ops.  */
+/* { dg-final { scan-tree-dump-not "xyzpdq" "optimized"} } */
+/* { dg-final { scan-tree-dump-times "if .abc123" 2 "optimized"} } */


Re: [PATCH 0/5] Fortran manual updates

2021-11-02 Thread Sandra Loosemore

On 11/2/21 9:20 AM, Martin Liška wrote:

On 11/2/21 15:48, Sandra Loosemore wrote:

On 11/2/21 2:51 AM, Martin Liška wrote:

On 11/2/21 00:56, Sandra Loosemore wrote:

I'll wait a couple days before committing these patches, in case
anybody wants to give some feedback, especially on technical issues.


Hello.

Appreciate the work you did, but the patchset will cause quite some 
conflicts

in the prepared Sphinx migration patch I've sent to the mailing list :/
Anyway, I will rebase my patches. For the future, are you planning 
doing similar
documentation reorganization for a manual? Based on discussion with 
Gerald, I hope

we can finish the transition before the end of this year.


My understanding was that, if this conversion is indeed going to 
happen, it's going to be automated by scripts?


Exactly, but the conversion needs some manual post-processing that I've 
already done.


  I hadn't seen any discussion of it on the list for months and 
thought the whole idea was on hold or scrapped, since it hasn't 
happened yet.


There was almost no response, so that's why I contacted Gerald about help.


I have to admit that I was buried in technical work at the time of the 
previous discussion (in fact, the Fortran things I am now trying to 
document), and didn't have time to look at the proposed changes in any 
detail.  I have wondered, though, why it's necessary to do this 
change  if people don't like the way Texinfo formats output, can't 
we fix Texinfo?  Or hack it to translate the sources to something like 
DocBook instead, and then adopt that as our source format?  I can write 
documentation in any markup format, but it seems to me that structured 
XML-based formats are a lot more amenable to scripted manipulation than 
either Texinfo or restructured text.  If the rest of the community is 
set on Sphinx, I'm fine with that, but I kind of don't see the point, 
myself.  :-S


In any case it does not seem reasonable to freeze the current Texinfo 
docs for months while waiting for it to happen, especially as we are 
heading into the end of the release cycle and people are finishing up 
changes and new features they need to document.


Sure, I can easily rebase normal changes, but you are suggesting a 
complete redesign/renaming. It's going to take me some time,

but I'll rebase my patches.


Well, what I've done is hardly a "complete" redesign/renaming of the 
Fortran manual -- I've barely scratched the surface on it.  My main goal 
was just to update the bit-rotten standards conformance sections, which 
were unfortunately spread among multiple places in the document.  I did 
consolidate those few sections, but I did not make any big-picture 
changes to the organization of the manual, and I have not even reviewed 
any other parts of it for accuracy or relevance.  I'd been thinking 
about making a pass to do some copy-editing things, like making sure all 
chapter/section titles use consistent title case capitalization, but I 
will hold off on that if it's going to cause problems.


-Sandra


RE: [PATCH 3/5]AArch64 sve: do not keep negated mask and inverse mask live at the same time

2021-11-02 Thread Tamar Christina via Gcc-patches
Hi,

Here's a respin of the patch which I think encompasses the method you expected.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

* tree-vect-stmts.c (vectorizable_condition): Check if inverse of mask
is live.
* tree-vectorizer.c (scalar_cond_masked_key::get_cond_ops_from_tree):
Register mask inverses.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/pred-not-gen-1.c: Update testcase.
* gcc.target/aarch64/sve/pred-not-gen-2.c: Update testcase.
* gcc.target/aarch64/sve/pred-not-gen-3.c: Update testcase.
* gcc.target/aarch64/sve/pred-not-gen-4.c: Update testcase.

--- inline copy of patch ---

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
index 
2c06564186c5a5e7917da475a9c201c81dfeb136..7fac35ea9387818aaa0a12ef66d02313013203af
 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
@@ -1,4 +1,4 @@
-/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-do compile } */
 /* { dg-options "-O3 --save-temps" } */
 
 /*
@@ -21,3 +21,4 @@ void f10(double * restrict z, double * restrict w, double * 
restrict x, double *
 
 /* { dg-final { scan-assembler-not {\tbic\t} } } */
 /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, 
z[0-9]+\.d, #0} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
index 
0c3b78d4c67455c971e94fb2ffdd7be2d4884864..d73f7cbeb6be7ad2cc54e601d6c4fbd4d98fa675
 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
@@ -1,4 +1,4 @@
-/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-do compile } */
 /* { dg-options "-O3 --save-temps" } */
 
 /*
@@ -21,3 +21,4 @@ void f11(double * restrict z, double * restrict w, double * 
restrict x, double *
 
 /* { dg-final { scan-assembler-not {\tbic\t} } } */
 /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, 
z[0-9]+\.d, #0.0} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
index 
248f8ab57191ce8a1d4c334533de8bc76aa07691..1240d7cb86d00221b546d81f128d64d22d347885
 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
@@ -1,4 +1,4 @@
-/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-do compile } */
 /* { dg-options "-O3 --save-temps" } */
 
 /*
@@ -19,3 +19,4 @@ void f12(int * restrict z, int * restrict w, int * restrict 
x, int * restrict y,
 
 /* { dg-final { scan-assembler-not {\tbic\t} } } */
 /* { dg-final { scan-assembler-not {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} } } */
+/* { dg-final { scan-assembler-times {\tcmple\tp[0-9]+\.s, p[0-9]+/z, 
z[0-9]+\.s, #0} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
index 
96200309880a91ad1db5801115c911cfdce06125..edda9c115900ca62268425f1616d975f6a7b7721
 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
@@ -1,4 +1,4 @@
-/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-do compile } */
 /* { dg-options "-O3 --save-temps" } */
 
 #include 
@@ -12,3 +12,4 @@ void f13(double * restrict z, double * restrict w, double * 
restrict x, double *
 
 /* { dg-final { scan-assembler-not {\tbic\t} } } */
 /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
p[0-9]+\.b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-9]+/z, 
z[0-9]+\.d, z[0-9]+\.d} 1 } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 
27ee48aea429810a3d907435a92b8fd1817d..23f7bed26626a872c165cd2654bb4391a847bd7e
 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10375,6 +10375,7 @@ vectorizable_condition (vec_info *vinfo,
  else
{
  bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
+ tree_code orig_code = cond.code;
  cond.code = invert_tree_comparison (cond.code, honor_nans);
  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
{
@@ -10382,6 +10383,22 @@ vectorizable_condition (vec_info *vinfo,
  cond_code = cond.code;
  swap_cond_operands = true;
}
+ else
+   {
+ /* Try the inverse of the current mask.  We check if the
+inverse mask is live and if so we generate

[committed v2] RISC-V: Fix build errors with shNadd/shNadd.uw patterns in zba cost model

2021-11-02 Thread Maciej W. Rozycki
Fix a build regression from commit 04a9b554ba1a ("RISC-V: Cost model 
for zba extension."):

.../gcc/config/riscv/riscv.c: In function 'bool riscv_rtx_costs(rtx, 
machine_mode, int, int, int*, bool)':
.../gcc/config/riscv/riscv.c:2018:11: error: 'and' of mutually exclusive 
equal-tests is always 0 [-Werror]
 2018 |   && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 0)), 1, 3))
  |   ^~
.../gcc/config/riscv/riscv.c:2047:17: error: unused variable 'ashift_lhs' 
[-Werror=unused-variable]
 2047 | rtx ashift_lhs = XEXP (and_lhs, 0);
  | ^~


by correcting a CONST_INT_P check referring the wrong operand and 
getting rid of the unused variable.

gcc/
* config/riscv/riscv.c (riscv_rtx_costs): Correct a CONST_INT_P 
check and remove an unused local variable with shNadd/shNadd.uw 
pattern handling.
---
Hi Kito,

> I think that's my mistake...it should fix following check rather than
> remove the REG_P like that:
> 
> @@ -2014,8 +2014,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int
> outer_code, int opno ATTRIBUTE_UN
>  (TARGET_64BIT && (mode == DImode)))
>  && (GET_CODE (XEXP (x, 0)) == ASHIFT)
>  && REG_P (XEXP (XEXP (x, 0), 0))
> - && CONST_INT_P (XEXP (XEXP (x, 0), 0))
> - && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 0)), 1, 3))
> + && CONST_INT_P (XEXP (XEXP (x, 0), 1))
> + && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
>{
>  *total = COSTS_N_INSNS (1);
>  return true;
> 
> 
> shNadd pattern:
> 
> (define_insn "*shNadd"
>  [(set (match_operand:X 0 "register_operand" "=r")
>(plus:X (ashift:X (match_operand:X 1 "register_operand" "r")
>  # What I want to check is here, it should be
> XEXP (XEXP (x, 0), 1)
>  (match_operand:QI 2 "immediate_operand" "I"))
>(match_operand:X 3 "register_operand" "r")))]

 Right, I should have cross-checked with the machine description.

 Also are we missing explicit test coverage here?  Or is it supposed to
be covered by the generic tests here or there already (I'm not familiar 
with the details of the ISA extension to tell offhand), as long as the 
extension has been enabled for the target tested, and it is just that 
the problem has slipped through somehow?

> Otherwise LGTM, feel free to commit once you address this issue.

 Rebuilt for verification and committed as shown.  Thank you for your 
review.

  Maciej
---
 gcc/config/riscv/riscv.c |5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

gcc-riscv-rtx-costs-zba-shnadd.diff
Index: gcc/gcc/config/riscv/riscv.c
===
--- gcc.orig/gcc/config/riscv/riscv.c
+++ gcc/gcc/config/riscv/riscv.c
@@ -2014,8 +2014,8 @@ riscv_rtx_costs (rtx x, machine_mode mod
  (TARGET_64BIT && (mode == DImode)))
  && (GET_CODE (XEXP (x, 0)) == ASHIFT)
  && REG_P (XEXP (XEXP (x, 0), 0))
- && CONST_INT_P (XEXP (XEXP (x, 0), 0))
- && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 0)), 1, 3))
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
{
  *total = COSTS_N_INSNS (1);
  return true;
@@ -2044,7 +2044,6 @@ riscv_rtx_costs (rtx x, machine_mode mod
if (!CONST_INT_P (and_rhs))
  break;
 
-   rtx ashift_lhs = XEXP (and_lhs, 0);
rtx ashift_rhs = XEXP (and_lhs, 1);
 
if (!CONST_INT_P (ashift_rhs)


Re: [PATCH] Add -fopt-builtin optimization option

2021-11-02 Thread Keith Packard via Gcc-patches
Richard Biener  writes:

> I don't think it reliably works the way you implement it.  It's also having
> more side-effects than what you document, in particular

Yeah, I made a 'minimal' patch that had the effect I needed, but it's
clearly in the wrong place as it disables the matching of builtins
against the incoming source code instead of the generation of new
builtin references from the tree.

> I think you may want to instead change builtin_decl_implicit
> to avoid code-generating a specific builtin.

Yup, I looked at that and there are numerous places which assume that
will work, so it will be a more complicated patch.

> Generally we'd also want sth like the clang attribute and _not_
> use optimize("") for this or a global flag_*, so the behavior can
> be more readily encoded in the IL.  In fact a flag on the call
> statement could be added to denote the desired effect on it.

Agreed, using the existing optimize attribute was a short-cut to
leverage the existing code handling that case. If we think providing
something that matches the clang attribute would be useful, it makes
sense to provide it using the same syntax.

> I also don't see the advantage compared to -fno-builtin[-foo].
> Declaring the function should be something that's already done.

The semantic of the clang option is not to completely disable access
to the given builtin function, but rather to stop the optimizer from
creating new builtin function references (either to a specific builtin,
or to all builtins).

If I could use "no-builtin" in a function attribute, I probably wouldn't
have bothered looking to implement the clang semantics, but -fno-builtin
isn't supported in this way. But, now that I think I understand the
behavior of attribute((no_builtin)) in clang, I think it has value
beyond what -fno-builtin performs as you can still gain access to
builtin functions when they are directly named.

I'll go implement changes in builtin_decl_implicit and all of the
affected call sites and see what that looks like.

Thanks much for your review!

-- 
-keith


signature.asc
Description: PGP signature


Re: [PATCH] [RFC][PR102768] aarch64: Add compiler support for Shadow Call Stack

2021-11-02 Thread Dan Li via Gcc-patches




On 11/2/21 9:04 PM, Szabolcs Nagy wrote:

The 11/02/2021 00:06, Dan Li via Gcc-patches wrote:

Shadow Call Stack can be used to protect the return address of a
function at runtime, and clang already supports this feature[1].

To enable SCS in user mode, in addition to compiler, other support
is also required (as described in [2]). This patch only adds basic
support for SCS from the compiler side, and provides convenience
for users to enable SCS.

For linux kernel, only the support of the compiler is required.

[1] https://clang.llvm.org/docs/ShadowCallStack.html
[2] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102768


i'm not a gcc maintainer, but i prefer such feature
to be in upstream gcc instead of in a plugin.

it will require update to the documentation:

which should mention that it depends on -ffixed-x18
(probably that should be enforced too) which is an
important abi issue: functions following the normal
pcs can clobber x18 and break scs.


Thanks Szabolcs, I will update the documentation in next version.

It sounds reasonable to enforced -ffixed-x18 with scs, but I see
that clang doesn’t do that. Maybe it is better to be consistent
with clang here?


and that there is no unwinder support.


Ok, let me try to add a support for this.


the abi issue means it is unlikely to be useful in
linux user space (even if libc and unwinder support
is implemented), but it can be still useful in
freestanding code such as the linux kernel.

thanks.



Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2021-11-02 Thread Jakub Jelinek via Gcc-patches
On Fri, Oct 22, 2021 at 02:05:02PM +0100, Hafiz Abid Qadeer wrote:
> This patch adds support for OpenMP 5.0 allocate clause for fortran. It does 
> not
> yet support the allocator-modifier as specified in OpenMP 5.1. The allocate
> clause is already supported in C/C++.
> 
> gcc/fortran/ChangeLog:
> 
>   * dump-parse-tree.c (show_omp_clauses): Handle OMP_LIST_ALLOCATE.
>   * gfortran.h (OMP_LIST_ALLOCATE): New enum value.
>   (allocate): New member in gfc_symbol.
>   * openmp.c (enum omp_mask1): Add OMP_CLAUSE_ALLOCATE.
>   (gfc_match_omp_clauses): Handle OMP_CLAUSE_ALLOCATE

Missing . at the end.

>   (OMP_PARALLEL_CLAUSES, OMP_DO_CLAUSES, OMP_SECTIONS_CLAUSES)
>   (OMP_TASK_CLAUSES, OMP_TASKLOOP_CLAUSES, OMP_TARGET_CLAUSES)
>   (OMP_TEAMS_CLAUSES, OMP_DISTRIBUTE_CLAUSES)
>   (OMP_SINGLE_CLAUSES): Add OMP_CLAUSE_ALLOCATE.
>   (OMP_TASKGROUP_CLAUSES): New

Likewise.

>   (gfc_match_omp_taskgroup): Use 'OMP_TASKGROUP_CLAUSES' instead of
>   'OMP_CLAUSE_TASK_REDUCTION'

Likewise.  Please also drop the ' characters.

> @@ -1880,6 +1881,10 @@ typedef struct gfc_symbol
>   according to the Fortran standard.  */
>unsigned pass_as_value:1;
>  
> +  /* Used to check if a variable used in allocate clause has also been
> + used in privatization clause.  */
> +  unsigned allocate:1;

I think it would be desirable to use omp_allocate here instead
of allocate and mention OpenMP in the comment too.
Fortran has allocate statement in the language, so not pointing to
OpenMP would only cause confusion.

> @@ -1540,6 +1541,40 @@ gfc_match_omp_clauses (gfc_omp_clauses **cp, const 
> omp_mask mask,
>   }
> continue;
>   }
> +   if ((mask & OMP_CLAUSE_ALLOCATE)
> +   && gfc_match ("allocate ( ") == MATCH_YES)
> + {
> +   gfc_expr *allocator = NULL;
> +   old_loc = gfc_current_locus;
> +   m = gfc_match_expr (&allocator);
> +   if (m != MATCH_YES)
> + {
> +   gfc_error ("Expected allocator or variable list at %C");
> +   goto error;
> + }
> +   if (gfc_match (" : ") != MATCH_YES)
> + {
> +   /* If no ":" then there is no allocator, we backtrack
> +  and read the variable list.  */
> +   allocator = NULL;

Isn't this a memory leak?  I believe Fortran FE expressions are not GC
allocated...
So, shouldn't there be gfc_free_expr or something similar before clearing it?

> +  /* Check for 2 things here.
> + 1.  There is no duplication of variable in allocate clause.
> + 2.  Variable in allocate clause are also present in some
> +  privatization clase.  */
> +  for (n = omp_clauses->lists[OMP_LIST_ALLOCATE]; n; n = n->next)
> +n->sym->allocate = 0;
> +
> +  gfc_omp_namelist *prev = NULL;
> +  for (n = omp_clauses->lists[OMP_LIST_ALLOCATE]; n;)
> +{
> +  if (n->sym->allocate == 1)
> + {
> +   gfc_warning (0, "%qs appears more than once in % "
> +   "clauses at %L" , n->sym->name, &n->where);
> +   /* We have already seen this variable so it is a duplicate.
> +  Remove it.  */
> +   if (prev != NULL && prev->next == n)
> + {
> +   prev->next = n->next;
> +   n->next = NULL;
> +   gfc_free_omp_namelist (n, 0);
> +   n = prev->next;
> + }
> +
> +   continue;
> + }
> +  n->sym->allocate = 1;
> +  prev = n;
> +  n = n->next;
> +}
> +
> +  for (list = 0; list < OMP_LIST_NUM; list++)
> +switch (list)
> +  {
> +  case OMP_LIST_PRIVATE:
> +  case OMP_LIST_FIRSTPRIVATE:
> +  case OMP_LIST_LASTPRIVATE:
> +  case OMP_LIST_REDUCTION:
> +  case OMP_LIST_REDUCTION_INSCAN:
> +  case OMP_LIST_REDUCTION_TASK:
> +  case OMP_LIST_IN_REDUCTION:
> +  case OMP_LIST_TASK_REDUCTION:
> +  case OMP_LIST_LINEAR:
> + for (n = omp_clauses->lists[list]; n; n = n->next)
> +   n->sym->allocate = 0;
> + break;
> +  default:
> + break;
> +  }
> +
> +  for (n = omp_clauses->lists[OMP_LIST_ALLOCATE]; n; n = n->next)
> +if (n->sym->allocate == 1)
> +  gfc_error ("%qs specified in 'allocate' clause at %L but not in an "
> +  "explicit privatization clause", n->sym->name, &n->where);

I'm not sure this is what the standard says, certainly C/C++ FE do this
quite differently for combined/composite constructs.
In particular, we first split the clauses to the individual leaf constructs
in c_omp_split_clauses, which for allocate clause is even more complicated
because as clarified in 5.2:
"The effect of the allocate clause is as if it is applied to all leaf 
constructs that permit the clause
and to which a data-sharing attribute clause that may create a private copy of 
the same list item is
applied."
so there is the has_dup_allocate stuff, we first duplicate it to all leaf
constructs that allow the allocate cl

Re: [committed v2] RISC-V: Fix build errors with shNadd/shNadd.uw patterns in zba cost model

2021-11-02 Thread Kito Cheng via Gcc-patches
On Wed, Nov 3, 2021 at 12:07 AM Maciej W. Rozycki  wrote:
>
> Fix a build regression from commit 04a9b554ba1a ("RISC-V: Cost model
> for zba extension."):
>
> .../gcc/config/riscv/riscv.c: In function 'bool riscv_rtx_costs(rtx, 
> machine_mode, int, int, int*, bool)':
> .../gcc/config/riscv/riscv.c:2018:11: error: 'and' of mutually exclusive 
> equal-tests is always 0 [-Werror]
>  2018 |   && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 0)), 1, 3))
>   |   ^~
> .../gcc/config/riscv/riscv.c:2047:17: error: unused variable 'ashift_lhs' 
> [-Werror=unused-variable]
>  2047 | rtx ashift_lhs = XEXP (and_lhs, 0);
>   | ^~
>
>
> by correcting a CONST_INT_P check referring the wrong operand and
> getting rid of the unused variable.
>
> gcc/
> * config/riscv/riscv.c (riscv_rtx_costs): Correct a CONST_INT_P
> check and remove an unused local variable with shNadd/shNadd.uw
> pattern handling.
> ---
> Hi Kito,
>
> > I think that's my mistake...it should fix following check rather than
> > remove the REG_P like that:
> >
> > @@ -2014,8 +2014,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int
> > outer_code, int opno ATTRIBUTE_UN
> >  (TARGET_64BIT && (mode == DImode)))
> >  && (GET_CODE (XEXP (x, 0)) == ASHIFT)
> >  && REG_P (XEXP (XEXP (x, 0), 0))
> > - && CONST_INT_P (XEXP (XEXP (x, 0), 0))
> > - && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 0)), 1, 3))
> > + && CONST_INT_P (XEXP (XEXP (x, 0), 1))
> > + && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
> >{
> >  *total = COSTS_N_INSNS (1);
> >  return true;
> >
> >
> > shNadd pattern:
> >
> > (define_insn "*shNadd"
> >  [(set (match_operand:X 0 "register_operand" "=r")
> >(plus:X (ashift:X (match_operand:X 1 "register_operand" "r")
> >  # What I want to check is here, it should be
> > XEXP (XEXP (x, 0), 1)
> >  (match_operand:QI 2 "immediate_operand" "I"))
> >(match_operand:X 3 "register_operand" "r")))]
>
>  Right, I should have cross-checked with the machine description.
>
>  Also are we missing explicit test coverage here?  Or is it supposed to
> be covered by the generic tests here or there already (I'm not familiar
> with the details of the ISA extension to tell offhand), as long as the
> extension has been enabled for the target tested, and it is just that
> the problem has slipped through somehow?

Cost model is not easy to test (at least to me :p), I usually verify
that by check the dump of combine pass to make sure the cost is right.

> > Otherwise LGTM, feel free to commit once you address this issue.
>
>  Rebuilt for verification and committed as shown.  Thank you for your
> review.

Thanks!

>
>   Maciej
> ---
>  gcc/config/riscv/riscv.c |5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> gcc-riscv-rtx-costs-zba-shnadd.diff
> Index: gcc/gcc/config/riscv/riscv.c
> ===
> --- gcc.orig/gcc/config/riscv/riscv.c
> +++ gcc/gcc/config/riscv/riscv.c
> @@ -2014,8 +2014,8 @@ riscv_rtx_costs (rtx x, machine_mode mod
>   (TARGET_64BIT && (mode == DImode)))
>   && (GET_CODE (XEXP (x, 0)) == ASHIFT)
>   && REG_P (XEXP (XEXP (x, 0), 0))
> - && CONST_INT_P (XEXP (XEXP (x, 0), 0))
> - && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 0)), 1, 3))
> + && CONST_INT_P (XEXP (XEXP (x, 0), 1))
> + && IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1)), 1, 3))
> {
>   *total = COSTS_N_INSNS (1);
>   return true;
> @@ -2044,7 +2044,6 @@ riscv_rtx_costs (rtx x, machine_mode mod
> if (!CONST_INT_P (and_rhs))
>   break;
>
> -   rtx ashift_lhs = XEXP (and_lhs, 0);
> rtx ashift_rhs = XEXP (and_lhs, 1);
>
> if (!CONST_INT_P (ashift_rhs)


Re: [PATCH] Record that -gtoggle is already used in gcc_options.

2021-11-02 Thread Richard Biener via Gcc-patches
On Tue, Nov 2, 2021 at 4:11 PM Martin Liška  wrote:
>
> On 11/2/21 15:33, Richard Biener wrote:
> > I think -gtoggle matches a Defered option and thus should be processed
> > in handle_common_deferred_options.
>
> Well, that's quite problematic as I handle_common_deferred_options is called
> after decode_options (that calls finish_options).
>
> Note there's direct dependency at very end of finish_options in between 
> -gtoggle
> and debug_nonbind_markers_p:
>
>
>if (flag_gtoggle)
>  {
>if (debug_info_level == DINFO_LEVEL_NONE)
> {
>   debug_info_level = DINFO_LEVEL_NORMAL;
>
>   if (write_symbols == NO_DEBUG)
> write_symbols = PREFERRED_DEBUGGING_TYPE;
> }
>else
> debug_info_level = DINFO_LEVEL_NONE;
>  }
>
>if (!OPTION_SET_P (debug_nonbind_markers_p))
>  debug_nonbind_markers_p
>= (optimize
>  && debug_info_level >= DINFO_LEVEL_NORMAL
>  && dwarf_debuginfo_p ()
>  && !(flag_selective_scheduling || flag_selective_scheduling2));
>
> I don't see who you mean the possible fix?

So at first I thought we might have a place that post-processes
'decoded_options' so we could reflect -gtoggle on those but
out-of-order (removing/adding -g).  But that's going to be mightly
complicated as well.

I wonder what the original issue is you fix?  You say we ap;ly
it for a second time but we should apply it onto the same
state as previously since we restore that for optimize attribute
processing?

Richard.

>
> Martin


Re: [PATCH] RISC-V: Fix register class subset checks for CLASS_MAX_NREGS

2021-11-02 Thread Kito Cheng via Gcc-patches
Hi Maciej:

LGTM, My first impression of this patch is also confusing about the
ordering of arguments for reg_class_subset_p, but after I double
checked that with gdb and read the comment above the
reg_class_subset_p, I think this change is right.

Thanks!


On Tue, Nov 2, 2021 at 11:17 PM Maciej W. Rozycki  wrote:
>
> Fix the register class subset checks in the determination of the maximum
> number of consecutive registers needed to hold a value of a given mode.
>
> The number depends on whether a register is a general-purpose or a
> floating-point register, so check whether the register class requested
> is a subset (argument 1 to `reg_class_subset_p') rather than superset
> (argument 2) of GR_REGS or FP_REGS class respectively.
>
> gcc/
> * config/riscv/riscv.c (riscv_class_max_nregs): Swap the
> arguments to `reg_class_subset_p'.
> ---
> Hi,
>
>  This looks like a thinko to me, but please double-check I have not become
> confused myself.
>
>  My understanding is that current code works, because the SIBCALL_REGS and
> JALR_REGS classes are only ever used for addresses, which won't ever span
> multiple registers, and then the only class other than ALL_REGS that
> inludes floating-point registers is FP_REGS.  Therefore the hook will only
> ever see either GR_REGS or FP_REGS as the class enquired about and
> consequently the result of the `reg_class_subset_p' calls will be the same
> regardless of the order of the arguments chosen.  We should be getting the
> semantics right regardless.
>
>  Regression-tested with the `riscv64-linux-gnu' target.  OK to apply?
>
>   Maciej
> ---
>  gcc/config/riscv/riscv.c |4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> gcc-riscv-class-max-nregs.diff
> Index: gcc/gcc/config/riscv/riscv.c
> ===
> --- gcc.orig/gcc/config/riscv/riscv.c
> +++ gcc/gcc/config/riscv/riscv.c
> @@ -4811,10 +4811,10 @@ riscv_modes_tieable_p (machine_mode mode
>  static unsigned char
>  riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
>  {
> -  if (reg_class_subset_p (FP_REGS, rclass))
> +  if (reg_class_subset_p (rclass, FP_REGS))
>  return riscv_hard_regno_nregs (FP_REG_FIRST, mode);
>
> -  if (reg_class_subset_p (GR_REGS, rclass))
> +  if (reg_class_subset_p (rclass, GR_REGS))
>  return riscv_hard_regno_nregs (GP_REG_FIRST, mode);
>
>return 0;


Re: [PATCH]middle-end Add an RPO pass after successful vectorization

2021-11-02 Thread Richard Sandiford via Gcc-patches
Richard Biener via Gcc-patches  writes:
> On Tue, 2 Nov 2021, Tamar Christina wrote:
>
>> > -Original Message-
>> > From: Richard Biener 
>> > Sent: Tuesday, November 2, 2021 2:24 PM
>> > To: Tamar Christina 
>> > Cc: gcc-patches@gcc.gnu.org; nd 
>> > Subject: Re: [PATCH]middle-end Add an RPO pass after successful
>> > vectorization
>> > 
>> > On Tue, 2 Nov 2021, Tamar Christina wrote:
>> > 
>> > > Hi All,
>> > >
>> > > Following my current SVE predicate optimization series a problem has
>> > > presented itself in that the way vector masks are generated for masked
>> > > operations relies on CSE to share masks efficiently.
>> > >
>> > > The issue however is that masking is done using the & operand and & is
>> > > associative and so reassoc decides to reassociate the masked operations.
>> > 
>> > But it does this for the purpose of canonicalization and thus CSE.
>> 
>> Yes, but it turns something like
>> 
>> (a & b) & mask into a & (b & mask).
>> 
>> When (a & b) is used somewhere else you now lose the CSE.  So it's actually 
>> hurting
>> In this case.
>
> OK, so that's a known "issue" with reassoc, it doesn't consider global
> CSE opportunities and I guess it pushes 'mask' to leaf if it is loop
> carried.
>
>> > 
>> > > This makes CSE then unable to CSE an unmasked and a masked operation
>> > > leading to duplicate operations being performed.
>> > >
>> > > To counter this we want to add an RPO pass over the vectorized loop
>> > > body when vectorization succeeds.  This makes it then no longer
>> > > reliant on the RTL level CSE.
>> > >
>> > > I have not added a testcase for this as it requires the changes in my
>> > > patch series, however the entire series relies on this patch to work
>> > > so all the tests there cover it.
>> > >
>> > > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and
>> > > no issues.
>> > >
>> > > Ok for master?
>> > 
>> > You are running VN over _all_ loop bodies rather only those vectorized.
>> > We loop over vectorized loops earlier for optimizing masked store 
>> > sequences.
>> > I suppose you could hook in there.  I'll also notice that we have
>> > pass_pre_slp_scalar_cleanup which eventually runs plus we have a late FRE.
>> > So I don't understand why it doesn't work to CSE later.
>> > 
>> 
>> Atm, say you have the conditions a > b, and a > b & a > c
>> 
>> We generate
>> 
>> mask1 = (a > b) & loop_mask
>> mask2 = (a > b & a > c) & loop_mask
>> 
>> with the intention that mask1 can be re-used in mask2.
>> 
>> Reassoc changes this to mask2 = a > b & (a > c & loop_mask)
>> 
>> Which has now unmasked (a > b) in mask2, which leaves us unable to combine
>> the mask1 and mask2.  It doesn't generate incorrect code, just inefficient.
>> 
>> >   for (i = 1; i < number_of_loops (cfun); i++)
>> > {
>> >   loop_vec_info loop_vinfo;
>> >   bool has_mask_store;
>> > 
>> >   loop = get_loop (cfun, i);
>> >   if (!loop || !loop->aux)
>> > continue;
>> >   loop_vinfo = (loop_vec_info) loop->aux;
>> >   has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
>> >   delete loop_vinfo;
>> >   if (has_mask_store
>> >   && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
>> > optimize_mask_stores (loop);
>> >   loop->aux = NULL;
>> > }
>> > 
>> 
>> Ah thanks, I'll make the changes.
>
> Note I think that full-blown CSE is a bit overkill just to counter
> a deficient reassoc (or VN).  At least it is supposed to be "cheap"
> and can be conditionalized on loop masks being used as well.

Not sure we should make this conditional on loop masks being used.
It seems either that:

(a) the vectoriser is supposed to avoid creating code that has folding
or VN opportunities, in which case we need to generate the vectorised
code in a smarter way or

(b) the vectoriser is allowed to create code that has folding or VN
opportunities, in which case it would be good to have a defined
place to get rid of them.

I'm just worried that if we make it conditional on loop masks,
we could see cases that in which non-loop-mask stuff is optimised
differently based on whether the loop has masks or not.  E.g.
we might get worse code with an unpredicated main loop and
a predicated epilogue compared to a predicated main loop.

Thanks,
Richard
>
>> Thanks,
>> Tamar
>> 
>> > 
>> > > Thanks,
>> > > Tamar
>> > >
>> > > gcc/ChangeLog:
>> > >
>> > >  * tree-vectorizer.c (vectorize_loops): Do local CSE through RPVN
>> > upon
>> > >  successful vectorization.
>> > >
>> > > --- inline copy of patch --
>> > > diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index
>> > >
>> > 4712dc6e7f907637774482a71036a0bd381c2bd2..1e370d60fb19b03c3b6bce45c
>> > 660
>> > > af4b6d32dc51 100644
>> > > --- a/gcc/tree-vectorizer.c
>> > > +++ b/gcc/tree-vectorizer.c
>> > > @@ -81,7 +81,7 @@ along with GCC; see the file COPYING3.  If not see
>> > > #include "gimple-pretty-print.h"
>> > >  #include "opt-problem.h"
>> > >  #include 

Re: [PATCH 1/2] add -Wuse-after-free

2021-11-02 Thread Martin Sebor via Gcc-patches

On 11/1/21 11:32 PM, Eric Gallager wrote:

On Mon, Nov 1, 2021 at 6:18 PM Martin Sebor via Gcc-patches
 wrote:


Patch 1 in the series detects a small subset of uses of pointers
made indeterminate by calls to deallocation functions like free
or C++ operator delete.  To control the conditions the warnings
are issued under the new -Wuse-after-free= option provides three
levels.  At the lowest level the warning triggers only for
unconditional uses of freed pointers and doesn't warn for uses
in equality expressions.  Level 2 warns also for come conditional
uses, and level 3 also for uses in equality expressions.

I debated whether to make level 2 or 3 the default included in
-Wall.  I decided on 3 for two reasons: 1) to raise awareness
of both the problem and GCC's new ability to detect it: using
a pointer after it's been freed, even only in principle, by
a successful call to realloc, is undefined, and 2) because
it's trivial to lower the level either globally, or locally
by suppressing the warning around such misuses.

I've tested the patch on x86_64-linux and by building Glibc
and Binutils/GDB.  It triggers a number of times in each, all
due to comparing invalidated pointers for equality (i.e., level
3).  I have suppressed these in GCC (libiberty) by a #pragma,
and will see how the Glibc folks want to deal with theirs (I
track them in BZ #28521).

The tests contain a number of xfails due to limitations I'm
aware of.  I marked them pr?? until the patch is approved.
I will open bugs for them before committing if I don't resolve
them in a followup.

Martin


Hi, I'm just wondering how this fares compared to the static
analyzer's -Wanalyzer-use-after-free; could you compare and contrast
them for us?


Good question.

The analyzer does a far more exhaustive, interprocedural
analysis of (most) paths through a program, symbolically
evaluating the conditions under which statements are
evaluated to determine reachability.

This initial implementation of -Wuse-after-free does only
a superficial analysis of a few nearby statements in a single
function (plus those inlined into it), those with direct
dependencies of uses on the deallocation statements.  It
doesn't do any evaluation of conditions which limits how
far it can go in its checking.  If it sees a pointer used
after a free call as in

  free (p);
  return *p;   // used after free (level 1)

it triggers.  If it sees a conditional use as in

  free (p);
  if (cond)
return *p;   // may be used after free (level 2)

it triggers at level 2, but only if the free is unconditional
and flows directly into the condition guarding the use.  If
the free is guarded by another condition it doesn't trigger:

  if (c_1)
free (p);

  if (c_2)
return *p;

This last case is the consequence of not doing any condition
evaluation (c_1 could be mutually exclusive with c_2).  Adding
support for it is a future enhancement, something I'm out of
time for in this stage 1 but I'd like to tackle for GCC 13.
Both GCC's and Clang's analyzers detect all three cases.

Unlike GCC's analyzer (but like Clang's), this new warning
flags all pointer uses, not just their derefernces (operands
of equality expressions only at level 3; Clang doesn't seem
to diagnose uses in equality tests at all).  So unlike
GCC's analyzer, it will trigger in the first two example
above even if it's the pointer itself being returned and not
what it pointed to before it was freed .  This is because
a freed pointer is invalid the same way an uninitialized
variable is invalid, and using it in any way is strictly
undefined  (returning it from a function or passing as
an argument to another can lead to memory corruption or
unintentional information disclosure).

Martin

PS The distinction between levels 1 (unconditional uses)
and 2 (conditional) is more like that between the levels
of -Warray-bounds -- level 2 catching slightly more cases
at the expense of potentially some, albeit rare, false
positives.  It's not like between -Wuninitialized and
-Wmaybe-uninitialized where the uninitialized checker
employs limited symbolic predicate analysis and the need
for separate options came out of the desire to be able to
control separately the false positives due to
the imperfections inherent in this strategy (or incidental
to its implementation in GCC: the limited ability to
determine that two conditions are mutually exclusive.


Re: [PATCH 3/5]AArch64 sve: do not keep negated mask and inverse mask live at the same time

2021-11-02 Thread Richard Sandiford via Gcc-patches
Tamar Christina  writes:
> Hi,
>
> Here's a respin of the patch which I think encompasses the method you 
> expected.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>   * tree-vect-stmts.c (vectorizable_condition): Check if inverse of mask
>   is live.
>   * tree-vectorizer.c (scalar_cond_masked_key::get_cond_ops_from_tree):
>   Register mask inverses.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/aarch64/sve/pred-not-gen-1.c: Update testcase.
>   * gcc.target/aarch64/sve/pred-not-gen-2.c: Update testcase.
>   * gcc.target/aarch64/sve/pred-not-gen-3.c: Update testcase.
>   * gcc.target/aarch64/sve/pred-not-gen-4.c: Update testcase.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
> index 
> 2c06564186c5a5e7917da475a9c201c81dfeb136..7fac35ea9387818aaa0a12ef66d02313013203af
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-1.c
> @@ -1,4 +1,4 @@
> -/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-do compile } */
>  /* { dg-options "-O3 --save-temps" } */

Why do we need this?  The --save-temps should ensure that we still
run the scan tests.

Very minor nit, but could you tabify the rest of operator==?  The new
line is correctly formatted but the existing ones aren't, which stands
out in the diff.

The patch is OK without the dg-do changes to the tests, if that works.

Thanks,
Richard

>  
>  /*
> @@ -21,3 +21,4 @@ void f10(double * restrict z, double * restrict w, double * 
> restrict x, double *
>  
>  /* { dg-final { scan-assembler-not {\tbic\t} } } */
>  /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, 
> z[0-9]+\.d, #0} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
> index 
> 0c3b78d4c67455c971e94fb2ffdd7be2d4884864..d73f7cbeb6be7ad2cc54e601d6c4fbd4d98fa675
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-2.c
> @@ -1,4 +1,4 @@
> -/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-do compile } */
>  /* { dg-options "-O3 --save-temps" } */
>  
>  /*
> @@ -21,3 +21,4 @@ void f11(double * restrict z, double * restrict w, double * 
> restrict x, double *
>  
>  /* { dg-final { scan-assembler-not {\tbic\t} } } */
>  /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-9]+/z, 
> z[0-9]+\.d, #0.0} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
> index 
> 248f8ab57191ce8a1d4c334533de8bc76aa07691..1240d7cb86d00221b546d81f128d64d22d347885
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-3.c
> @@ -1,4 +1,4 @@
> -/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-do compile } */
>  /* { dg-options "-O3 --save-temps" } */
>  
>  /*
> @@ -19,3 +19,4 @@ void f12(int * restrict z, int * restrict w, int * restrict 
> x, int * restrict y,
>  
>  /* { dg-final { scan-assembler-not {\tbic\t} } } */
>  /* { dg-final { scan-assembler-not {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} } } */
> +/* { dg-final { scan-assembler-times {\tcmple\tp[0-9]+\.s, p[0-9]+/z, 
> z[0-9]+\.s, #0} 1 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
> index 
> 96200309880a91ad1db5801115c911cfdce06125..edda9c115900ca62268425f1616d975f6a7b7721
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred-not-gen-4.c
> @@ -1,4 +1,4 @@
> -/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-do compile } */
>  /* { dg-options "-O3 --save-temps" } */
>  
>  #include 
> @@ -12,3 +12,4 @@ void f13(double * restrict z, double * restrict w, double * 
> restrict x, double *
>  
>  /* { dg-final { scan-assembler-not {\tbic\t} } } */
>  /* { dg-final { scan-assembler-times {\tnot\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-9]+/z, 
> z[0-9]+\.d, z[0-9]+\.d} 1 } } */
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index 
> 27ee48aea429810a3d907435a92b8fd1817d..23f7bed26626a872c165cd2654bb4391a847bd7e
>  100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -10375,6 +10375,7 @@ vectorizable_condition (vec_info *vinfo,
> else
>   {
> bool honor_nans = HONOR_NANS (TR

[PATCH v2] libcpp: Implement -Wbidirectional for CVE-2021-42574 [PR103026]

2021-11-02 Thread Marek Polacek via Gcc-patches
On Mon, Nov 01, 2021 at 10:10:40PM +, Joseph Myers wrote:
> On Mon, 1 Nov 2021, Marek Polacek via Gcc-patches wrote:
> 
> > +  /* We've read a bidi char, update the current vector as necessary.  */
> > +  void on_char (kind k, bool ucn_p)
> > +  {
> > +switch (k)
> > +  {
> > +  case kind::LRE:
> > +  case kind::RLE:
> > +  case kind::LRO:
> > +  case kind::RLO:
> > +   vec.push (ucn_p ? 3u : 1u);
> > +   break;
> > +  case kind::LRI:
> > +  case kind::RLI:
> > +  case kind::FSI:
> > +   vec.push (ucn_p ? 2u : 0u);
> > +   break;
> > +  case kind::PDF:
> > +   if (current_ctx () == kind::PDF)
> > + pop ();
> > +   break;
> > +  case kind::PDI:
> > +   if (current_ctx () == kind::PDI)
> > + pop ();
> 
> My understanding is that PDI should pop all intermediate PDF contexts 
> outward to a PDI context, which it also pops.  (But if it's embedded only 
> in PDF contexts, with no PDI context containing it, it doesn't pop 
> anything.)
> 
> I think failing to handle that only means libcpp sometimes models there 
> as being more bidirectional contexts open than there should be, so it 
> might give spurious warnings when in fact all such contexts had been 
> closed by end of string or comment.

Ah, you're right.
https://www.unicode.org/reports/tr9/#Terminating_Explicit_Directional_Isolates
says that "[PDI] terminates the scope of the last LRI, RLI, or FSI whose
scope has not yet been terminated, as well as the scopes of any subsequent
LREs, RLEs, LROs, or RLOs whose scopes have not yet been terminated."
but PDF doesn't have the latter quirk.

Fixed in the below: I added a suitable truncate into on_char.  The new test
Wbidirectional-14.c exercises the handling of PDI.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
>From a link below:
"An issue was discovered in the Bidirectional Algorithm in the Unicode
Specification through 14.0. It permits the visual reordering of
characters via control sequences, which can be used to craft source code
that renders different logic than the logical ordering of tokens
ingested by compilers and interpreters. Adversaries can leverage this to
encode source code for compilers accepting Unicode such that targeted
vulnerabilities are introduced invisibly to human reviewers."

More info:
https://nvd.nist.gov/vuln/detail/CVE-2021-42574
https://trojansource.codes/

This is not a compiler bug.  However, to mitigate the problem, this patch
implements -Wbidirectional=[none|unpaired|any] to warn about possibly
misleading Unicode bidirectional characters the preprocessor may encounter.

The default is =unpaired, which warns about improperly terminated
bidirectional characters; e.g. a LRE without its appertaining PDF.  The
level =any warns about any use of bidirectional characters.

This patch handles both UCNs and UTF-8 characters.  UCNs designating
bidi characters in identifiers are accepted since r204886.  Then r217144
enabled -fextended-identifiers by default.  Extended characters in C/C++
identifiers have been accepted since r275979.  However, this patch still
warns about mixing UTF-8 and UCN bidi characters; there seems to be no
good reason to allow mixing them.

We warn in different contexts: comments (both C and C++-style), string
literals, character constants, and identifiers.  Expectedly, UCNs are ignored
in comments and raw string literals.  The bidirectional characters can nest
so this patch handles that as well.

I have not included nor tested this at all with Fortran (which also has
string literals and line comments).

Dave M. posted patches improving diagnostic involving Unicode characters.
This patch does not make use of this new infrastructure yet.

PR preprocessor/103026

gcc/c-family/ChangeLog:

* c.opt (Wbidirectional, Wbidirectional=): New option.

gcc/ChangeLog:

* doc/invoke.texi: Document -Wbidirectional.

libcpp/ChangeLog:

* include/cpplib.h (enum cpp_bidirectional_level): New.
(struct cpp_options): Add cpp_warn_bidirectional.
(enum cpp_warning_reason): Add CPP_W_BIDIRECTIONAL.
* init.c (cpp_create_reader): Set cpp_warn_bidirectional.
* lex.c (bidi): New namespace.
(get_bidi_utf8): New function.
(get_bidi_ucn): Likewise.
(maybe_warn_bidi_on_close): Likewise.
(maybe_warn_bidi_on_char): Likewise.
(_cpp_skip_block_comment): Implement warning about bidirectional
characters.
(skip_line_comment): Likewise.
(forms_identifier_p): Likewise.
(lex_identifier): Likewise.
(lex_string): Likewise.
(lex_raw_string): Likewise.

gcc/testsuite/ChangeLog:

* c-c++-common/Wbidirectional-1.c: New test.
* c-c++-common/Wbidirectional-2.c: New test.
* c-c++-common/Wbidirectional-3.c: New test.
* c-c++-common/Wbidirectional-4.c: New test.
* c-c++-common/Wbidirectional-5.c: New test.
* c-c++-common/Wbidirectional-6.c: New 

[PATCH] IBM Z: Free bbs in s390_loop_unroll_adjust

2021-11-02 Thread Stefan Schulze Frielinghaus via Gcc-patches
Bootstrapped and regtested on IBM Z.  Ok for mainline?

gcc/ChangeLog:

* config/s390/s390.c (s390_loop_unroll_adjust): In case of early
exit free bbs.
---
 gcc/config/s390/s390.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index b2f2f6417b3..510e7f58a3b 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -15400,7 +15400,10 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop 
*loop)
  || (GET_CODE (SET_SRC (set)) == COMPARE
  && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
  && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
-   return 1;
+   {
+ free (bbs);
+ return 1;
+   }
 
  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
if (MEM_P (*iter))
-- 
2.31.1



[PATCH] tree-optimization/103029 - ensure vect loop versioning constraint on PHIs

2021-11-02 Thread Richard Biener via Gcc-patches
PHI nodes in vectorizer loop versioning need to maintain the same
order of PHI arguments to not disturb SLP discovery.  The following
adds an assertion and mitigation in case loop versioning breaks this
which happens more often after the recent reorg.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-11-02  Richard Biener  

PR tree-optimization/103029
* tree-vect-loop-manip.c (vect_loop_versioning): Ensure
the PHI nodes in the loop maintain their original operand
order.
---
 gcc/tree-vect-loop-manip.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 72d583189c4..f788deb3d72 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -3562,12 +3562,29 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
 "applying loop versioning to outer loop %d\n",
 loop_to_version->num);
 
+  unsigned orig_pe_idx = loop_preheader_edge (loop)->dest_idx;
+
   initialize_original_copy_tables ();
   nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
prob, prob.invert (), prob, prob.invert (), true);
   gcc_assert (nloop);
   nloop = get_loop_copy (loop);
 
+  /* For cycle vectorization with SLP we rely on the PHI arguments
+appearing in the same order as the SLP node operands which for the
+loop PHI nodes means the preheader edge dest index needs to remain
+the same for the analyzed loop which also becomes the vectorized one.
+Make it so in case the state after versioning differs by redirecting
+the first edge into the header to the same destination which moves
+it last.  */
+  if (loop_preheader_edge (loop)->dest_idx != orig_pe_idx)
+   {
+ edge e = EDGE_PRED (loop->header, 0);
+ ssa_redirect_edge (e, e->dest);
+ flush_pending_stmts (e);
+   }
+  gcc_assert (loop_preheader_edge (loop)->dest_idx == orig_pe_idx);
+
   /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
  reap those otherwise;  they also refer to the original
 loops.  */
-- 
2.31.1


Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2021-11-02 Thread Jakub Jelinek via Gcc-patches
On Tue, Nov 02, 2021 at 05:27:14PM +0100, Jakub Jelinek via Gcc-patches wrote:
> I'm not sure this is what the standard says, certainly C/C++ FE do this
> quite differently for combined/composite constructs.
> In particular, we first split the clauses to the individual leaf constructs
> in c_omp_split_clauses, which for allocate clause is even more complicated
> because as clarified in 5.2:
> "The effect of the allocate clause is as if it is applied to all leaf 
> constructs that permit the clause
> and to which a data-sharing attribute clause that may create a private copy 
> of the same list item is
> applied."
> so there is the has_dup_allocate stuff, we first duplicate it to all leaf
> constructs that allow the allocate clause and set has_dup_allocate if it is
> put on more than one construct, and then if has_dup_allocate is set, do
> more detailed processing.  And finally then {,c_}finish_omp_clauses
> diagnoses what you are trying above, but only on each leaf construct
> separately.
> 
> Now, Fortran is performing the splitting of clauses only much later in
> trans-openmp.c, I wonder if it doesn't have other issues on
> combined/composite constructs if it performs other checks only on the
> clauses on the whole combined/composite construct and not just each leaf
> separately.  I'd say we should move that diagnostics and perhaps other
> similar later on into a separate routine that is invoked only after the
> clauses are split or for non-combined/composite construct clauses.

Testcases unrelated to allocate clause that have same problematic behavior:

void
foo (int x)
{
  #pragma omp parallel for simd shared (x) private (x)
  for (int i = 0; i < 32; i++)
;
}

is correctly accepted, as per
Clauses on Combined and Composite Constructs
shared clause goes to parallel construct, private goes to innermost
leaf aka simd, so there is no leaf construct with multiple data sharing
clauses for x.

But:

subroutine foo (x)
  integer :: x, i
  !$omp parallel do simd shared (x) private (x)
  do i = 1, 32
  end do
end subroutine

is incorrectly rejected with:
3 |   !$omp parallel do simd shared (x) private (x)
  | 1
Error: Symbol ‘x’ present on multiple clauses at (1)

Jakub



Re: redundant bitmap_bit_p followed by bitmap_clear_bit [was: Re: [COMMITTED] Kill second order relations in the path solver.]

2021-11-02 Thread Richard Sandiford via Gcc-patches
Richard Biener via Gcc-patches  writes:
> On Mon, Nov 1, 2021 at 10:02 PM Bernhard Reutner-Fischer via
> Gcc-patches  wrote:
>>
>> On Mon, 1 Nov 2021 15:21:03 +0100
>> Aldy Hernandez  wrote:
>>
>> > I'm not convinced this makes the code clearer to read, especially if
>> > it's not on a critical path.  But if you feel strongly, please submit
>> > a patch ;-).
>>
>> No i don't feel strongly about it.
>> Compiling e.g. -O2 ira.o
>> # Overhead   Samples  Command  Shared Object  Symbol
>> #     ...  .  .
>> #
>>100.00%  4197  cc1plus  cc1plus[.] mark_reachable_blocks
>>100.00% 22000  cc1plus  cc1plus[.] 
>> path_oracle::killing_def
>> and the mark_elimination is reload.
>> So it's not just a handful of calls saved but some. And it would be
>> smaller code as it saves a call. Well maybe another day.
>
> Note that single bit set/clear are already implemented as test && set/clear.
> Note that unfortunately the sbitmap bitmap_set/clear_bit overloads do not
> return the previous state of the bit.

+1 that it would good if the sbitmap versions behaved like the bitmap
versions.  Always low-key bothered me that they didn't do this when I
hit it, but never got round to do anything about it...

Bitmap operations consistently show up high in the profiles, so IMO
using the return value of bitmap_set_bit and bitmap_clear_bit should be
the preferred style.  Not all uses are performance-critical, of course,
but code tends to get copied around.  Having all code do it the fast
way reduces the risk that slow code gets copied to code that needs
to be fast. :-)

> Maybe providing bitmap_test_and_set_bit () and
> bitmap_test_and_clear_bit () would be better (but note we currently
> return true when the bit changed, not when it was set).

Yeah, maybe it's just familiarity, but I find the:

  if (bitmap_set_bit (bb, i))
...something changed...

thing easier to follow than:

  if (!bitmap_test_and_set_bit (bb, i))
...something changed...

Thanks,
Richard


[PATCH] c++: nested lambda capturing a capture proxy [PR94376]

2021-11-02 Thread Patrick Palka via Gcc-patches
Here when determining the type of the FIELD_DECL for the by-value
capture of 'i' in the inner lambda, we incorrectly give it the
type const int instead of int since the effective initializer is
the proxy for the outer capture, and this proxy is const qualified
since the outer lambda is non-mutable.

This patch fixes this by handling capturing of capture proxies specially
in lambda_capture_field_type, namely we instead consider the type of
their FIELD_DECL which unlike the proxy has the true cv-quals of the
captured entity.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/94376

gcc/cp/ChangeLog:

* lambda.c (lambda_capture_field_type): Simplify by handling the
is_this case first and specially.  When capturing by-value a
capture proxy, consider the type of the corresponding field
instead.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/lambda/lambda-nested9.C: New test.
---
 gcc/cp/lambda.c   | 19 +++--
 .../g++.dg/cpp0x/lambda/lambda-nested9.C  | 41 +++
 2 files changed, 56 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-nested9.C

diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c
index 2e9d38bbe83..f16c121dc78 100644
--- a/gcc/cp/lambda.c
+++ b/gcc/cp/lambda.c
@@ -196,7 +196,9 @@ lambda_capture_field_type (tree expr, bool explicit_init_p,
   tree type;
   bool is_this = is_this_parameter (tree_strip_nop_conversions (expr));
 
-  if (!is_this && explicit_init_p)
+  if (is_this)
+type = TREE_TYPE (expr);
+  else if (explicit_init_p)
 {
   tree auto_node = make_auto ();
   
@@ -210,7 +212,7 @@ lambda_capture_field_type (tree expr, bool explicit_init_p,
   else
type = do_auto_deduction (type, expr, auto_node);
 }
-  else if (!is_this && type_dependent_expression_p (expr))
+  else if (type_dependent_expression_p (expr))
 {
   type = cxx_make_type (DECLTYPE_TYPE);
   DECLTYPE_TYPE_EXPR (type) = expr;
@@ -220,10 +222,19 @@ lambda_capture_field_type (tree expr, bool 
explicit_init_p,
 }
   else
 {
+  if (!by_reference_p && is_capture_proxy (expr))
+   {
+ /* When capturing by-value another capture proxy from an enclosing
+lambda, consider the type of the corresponding field instead,
+as the proxy may be const-qualifed if the enclosing lambda is
+non-mutable (PR94376).  */
+ gcc_assert (TREE_CODE (DECL_VALUE_EXPR (expr)) == COMPONENT_REF);
+ expr = TREE_OPERAND (DECL_VALUE_EXPR (expr), 1);
+   }
+
   type = non_reference (unlowered_expr_type (expr));
 
-  if (!is_this
- && (by_reference_p || TREE_CODE (type) == FUNCTION_TYPE))
+  if (by_reference_p || TREE_CODE (type) == FUNCTION_TYPE)
type = build_reference_type (type);
 }
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-nested9.C 
b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-nested9.C
new file mode 100644
index 000..ff7da3b0ce1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-nested9.C
@@ -0,0 +1,41 @@
+// PR c++/94376
+// { dg-do compile { target c++11 } }
+
+int main() {
+  // We used to incorrectly reject the first two cases.
+  int i = 0;
+  [=] () {
+[=] () mutable {
+  ++i;
+};
+  };
+
+#if __cpp_init_captures
+  [j=0] () {
+[=] () mutable {
+  ++j;
+};
+  };
+#endif
+
+  [=] () {
+[&] () mutable {
+  ++i; // { dg-error "read-only" }
+};
+  };
+
+  const int j = 0;
+  [=] () {
+[=] () mutable {
+  ++j; // { dg-error "read-only" }
+};
+  };
+
+#if __cpp_init_captures
+  [j=0] () {
+[&] () mutable {
+  ++j; // { dg-error "read-only" }
+};
+  };
+#endif
+}
-- 
2.34.0.rc0.19.g0cddd84c9f



Re: [PATCH]middle-end Add an RPO pass after successful vectorization

2021-11-02 Thread Richard Biener via Gcc-patches
On Tue, 2 Nov 2021, Richard Sandiford wrote:

> Richard Biener via Gcc-patches  writes:
> > On Tue, 2 Nov 2021, Tamar Christina wrote:
> >
> >> > -Original Message-
> >> > From: Richard Biener 
> >> > Sent: Tuesday, November 2, 2021 2:24 PM
> >> > To: Tamar Christina 
> >> > Cc: gcc-patches@gcc.gnu.org; nd 
> >> > Subject: Re: [PATCH]middle-end Add an RPO pass after successful
> >> > vectorization
> >> > 
> >> > On Tue, 2 Nov 2021, Tamar Christina wrote:
> >> > 
> >> > > Hi All,
> >> > >
> >> > > Following my current SVE predicate optimization series a problem has
> >> > > presented itself in that the way vector masks are generated for masked
> >> > > operations relies on CSE to share masks efficiently.
> >> > >
> >> > > The issue however is that masking is done using the & operand and & is
> >> > > associative and so reassoc decides to reassociate the masked 
> >> > > operations.
> >> > 
> >> > But it does this for the purpose of canonicalization and thus CSE.
> >> 
> >> Yes, but it turns something like
> >> 
> >> (a & b) & mask into a & (b & mask).
> >> 
> >> When (a & b) is used somewhere else you now lose the CSE.  So it's 
> >> actually hurting
> >> In this case.
> >
> > OK, so that's a known "issue" with reassoc, it doesn't consider global
> > CSE opportunities and I guess it pushes 'mask' to leaf if it is loop
> > carried.
> >
> >> > 
> >> > > This makes CSE then unable to CSE an unmasked and a masked operation
> >> > > leading to duplicate operations being performed.
> >> > >
> >> > > To counter this we want to add an RPO pass over the vectorized loop
> >> > > body when vectorization succeeds.  This makes it then no longer
> >> > > reliant on the RTL level CSE.
> >> > >
> >> > > I have not added a testcase for this as it requires the changes in my
> >> > > patch series, however the entire series relies on this patch to work
> >> > > so all the tests there cover it.
> >> > >
> >> > > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-linux-gnu and
> >> > > no issues.
> >> > >
> >> > > Ok for master?
> >> > 
> >> > You are running VN over _all_ loop bodies rather only those vectorized.
> >> > We loop over vectorized loops earlier for optimizing masked store 
> >> > sequences.
> >> > I suppose you could hook in there.  I'll also notice that we have
> >> > pass_pre_slp_scalar_cleanup which eventually runs plus we have a late 
> >> > FRE.
> >> > So I don't understand why it doesn't work to CSE later.
> >> > 
> >> 
> >> Atm, say you have the conditions a > b, and a > b & a > c
> >> 
> >> We generate
> >> 
> >> mask1 = (a > b) & loop_mask
> >> mask2 = (a > b & a > c) & loop_mask
> >> 
> >> with the intention that mask1 can be re-used in mask2.
> >> 
> >> Reassoc changes this to mask2 = a > b & (a > c & loop_mask)
> >> 
> >> Which has now unmasked (a > b) in mask2, which leaves us unable to combine
> >> the mask1 and mask2.  It doesn't generate incorrect code, just inefficient.
> >> 
> >> >   for (i = 1; i < number_of_loops (cfun); i++)
> >> > {
> >> >   loop_vec_info loop_vinfo;
> >> >   bool has_mask_store;
> >> > 
> >> >   loop = get_loop (cfun, i);
> >> >   if (!loop || !loop->aux)
> >> > continue;
> >> >   loop_vinfo = (loop_vec_info) loop->aux;
> >> >   has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
> >> >   delete loop_vinfo;
> >> >   if (has_mask_store
> >> >   && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
> >> > optimize_mask_stores (loop);
> >> >   loop->aux = NULL;
> >> > }
> >> > 
> >> 
> >> Ah thanks, I'll make the changes.
> >
> > Note I think that full-blown CSE is a bit overkill just to counter
> > a deficient reassoc (or VN).  At least it is supposed to be "cheap"
> > and can be conditionalized on loop masks being used as well.
> 
> Not sure we should make this conditional on loop masks being used.
> It seems either that:
> 
> (a) the vectoriser is supposed to avoid creating code that has folding
> or VN opportunities, in which case we need to generate the vectorised
> code in a smarter way or
> 
> (b) the vectoriser is allowed to create code that has folding or VN
> opportunities, in which case it would be good to have a defined
> place to get rid of them.

It's certainly (b), and the definitive place to get rid of those is
the post-loop optimizer FRE pass.  That just happens to be after
a reassoc pass which makes FRE run into the pre-existing issue
that we fail to capture all (or the best) possible CSE opportunity
from separate associatable chains.

> I'm just worried that if we make it conditional on loop masks,
> we could see cases that in which non-loop-mask stuff is optimised
> differently based on whether the loop has masks or not.  E.g.
> we might get worse code with an unpredicated main loop and
> a predicated epilogue compared to a predicated main loop.

Sure.  Note for loop vectorization we can indeed reasonably easy
CSE the main body and RPO VN should be 

Re: [PATCH 2/2] add -Wdangling-pointer [PR #63272]

2021-11-02 Thread Martin Sebor via Gcc-patches

On 11/2/21 1:40 AM, Eric Gallager wrote:

On Mon, Nov 1, 2021 at 6:20 PM Martin Sebor via Gcc-patches
 wrote:


Patch 2 in this series adds support for detecting the uses of
dangling pointers: those to auto objects that have gone out of
scope.  Like patch 1, to minimize false positives this detection
is very simplistic.  However, thanks to the more deterministic
nature of the problem (all local objects go out of scope) is able
to detect more instances of it.  The approach I used is to simply
search the IL for clobbers that dominate uses of pointers to
the clobbered objects.  If such a use is found that's not
followed by a clobber of the same object the warning triggers.
Similar to -Wuse-after-free, the new -Wdangling-pointer option
has multiple levels: level 1 to detect unconditional uses and
level 2 to flag conditional ones.  Unlike with -Wuse-after-free
there is no use case for testing dangling pointers for
equality, so there is no level 3.

Tested on x86_64-linux and  by building Glibc and Binutils/GDB.
It found no problems outside of the GCC test suite.

As with the first patch in this series, the tests contain a number
of xfails due to known limitations marked with pr??.  I'll
open bugs for them before committing the patch if I don't resolve
them first in a followup.

Martin


So, I'd just like to take this chance to re-state my preference (as a
user) for having separate named options for warnings instead of having
a single option with multiple levels, so that users can toggle just
one but not the other. With the numerical levels, one can detect only
unconditional uses, and not conditional ones, by using level one, but
they can't do it the other way around (i.e. detecting conditional
uses, but not unconditional ones), though. I think having a split like
the one that exists between -Wuninitialized and -Wmaybe-uninitialized
would make sense here.


I agree that separate options are preferable for warnings with
different design strategies.  That said, I can't think of a use
case for enabling the "higher" level of either of any of our
warnings without also enabling the former.  Their sole purpose
is to control the S/R ratio, or perhaps also the cost in terms
of the compile-time resources spent on the analysis, though
I'm not aware of any measurements backing this up.

The distinction between levels 1 and 2 in the proposed warnings
is fairly subtle, and the higher level is by design exceedingly
unlikely to result in enough false positives to make lowering
it worthwhile.  It may not even be worth exposing a difference
between them via an option.   In fact, I think it might make
sense to merge them (perhaps keeping the "may" phrasing) unless
a compelling argument turns up in favor of keeping them distinct.

Martin


Re: [PATCH] PR fortran/91497 -- Silence conversion warnings for MIN1 and MAX1

2021-11-02 Thread Thomas Koenig via Gcc-patches

On 02.11.21 15:22, Manfred Schwarb wrote:

Am 02.11.21 um 14:26 schrieb Thomas Koenig:

Hi Manfred,


In addition to the patches of Steve Kargl for PR 91497:
The MIN1 and MAX1 intrinsics do explicit type conversions and should
be silenced too for -Wconversion and -Wconversion-extra.

Adjust testcase to only use *4 and *8 real types, provide a second
testcase for *10 and *16 precisions.

Two points:

We should modify existing test cases only when necessary, because
modification can impede a regression test.  It is better to create
a new one.



Yes, but this was a quick-and-dirty test of mine, and I realized only afterwards
that Steve had used it as-is. The new testcase is more consistent and more 
complete.
Sandra got errors on targets without REAL(16) support and requested changes,
so I decided to split it.

So you want me to "split" it in 3 parts?
- existing test as is, only for targets with REAL(16) support
- additional tests incl. complex intrinsics for targets with REAL(16) support
- additional tests incl. complex intrinsics for all targets, only single and 
double precision

OTOH, it is perhaps not worth the trouble to do REAL(10) and REAL(16) tests, 
either
it warns or it does not.


Anything that tests both complex and REAL(16) is fine by me.  I don't
think you need to test the combination of COMPLEX(16), both
codepaths have been seen by that time :-)

Or you can split it three ways, like you wrote above.


While we do recognize real*4 and real*8 and so on, these are
non-standard extensions, and I would like to avoid to have these
with new test cases.

Instead of real*8, you can use real(8) or double precision.



Well, double precision is deprecated AFAIK.


Not in Fortran 2018.

Best regards

Thomas


Re: [PATCH v3] AArch64: Improve GOT addressing

2021-11-02 Thread Wilco Dijkstra via Gcc-patches
Hi Richard,

> - Why do we rewrite the constant moves after reload into ldr_got_small_sidi
>   and ldr_got_small_?  Couldn't we just get the move patterns to
>   output the sequence directly?

That's possible too, however it makes the movsi/di patterns more complex.
See version v4 below.

> - I think we should leave out the rtx_costs part and deal with that
>   separately.  This patch should just be about whether we emit two
>   separate define_insns for the move or whether we keep a single one
>   (to support relaxation).

As the title and description explain, code quality improves significantly by
keeping the instructions together before we even consider linker relaxation.
The cost improvements can be done separately but they are important to
get the measured code quality gains.

Here is v4:

Improve GOT addressing by treating the instructions as a pair.  This reduces
register pressure and improves code quality significantly.  SPECINT2017 improves
by 0.6% with -fPIC and codesize is 0.73% smaller.  Perlbench has 0.9% smaller
codesize, 1.5% fewer executed instructions and is 1.8% faster on Neoverse N1.

Passes bootstrap and regress. OK for commit?

ChangeLog:
2021-11-02  Wilco Dijkstra  

* config/aarch64/aarch64.md (movsi): Add alternative for GOT accesses.
(movdi): Likewise.
(ldr_got_small_): Remove pattern.
(ldr_got_small_sidi): Likewise.
* config/aarch64/aarch64.c (aarch64_load_symref_appropriately): Keep
GOT accesses as moves.
(aarch64_print_operand): Correctly print got_lo12 in L specifier.
(aarch64_mov_operand_p): Make GOT accesses valid move operands.
* config/aarch64/constraints.md: Add new constraint Usw for GOT access.

---
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
6d26218a61bd0f9e369bb32388b7f8643b632172..ab2954d11333b3f5a419c55d0a74f13d1df70680
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3753,47 +3753,8 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
   }
 
 case SYMBOL_SMALL_GOT_4G:
-  {
-   /* In ILP32, the mode of dest can be either SImode or DImode,
-  while the got entry is always of SImode size.  The mode of
-  dest depends on how dest is used: if dest is assigned to a
-  pointer (e.g. in the memory), it has SImode; it may have
-  DImode if dest is dereferenced to access the memeory.
-  This is why we have to handle three different ldr_got_small
-  patterns here (two patterns for ILP32).  */
-
-   rtx insn;
-   rtx mem;
-   rtx tmp_reg = dest;
-   machine_mode mode = GET_MODE (dest);
-
-   if (can_create_pseudo_p ())
- tmp_reg = gen_reg_rtx (mode);
-
-   emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
-   if (mode == ptr_mode)
- {
-   if (mode == DImode)
- insn = gen_ldr_got_small_di (dest, tmp_reg, imm);
-   else
- insn = gen_ldr_got_small_si (dest, tmp_reg, imm);
-
-   mem = XVECEXP (SET_SRC (insn), 0, 0);
- }
-   else
- {
-   gcc_assert (mode == Pmode);
-
-   insn = gen_ldr_got_small_sidi (dest, tmp_reg, imm);
-   mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
- }
-
-   gcc_assert (MEM_P (mem));
-   MEM_READONLY_P (mem) = 1;
-   MEM_NOTRAP_P (mem) = 1;
-   emit_insn (insn);
-   return;
-  }
+  emit_insn (gen_rtx_SET (dest, imm));
+  return;
 
 case SYMBOL_SMALL_TLSGD:
   {
@@ -11159,7 +11120,7 @@ aarch64_print_operand (FILE *f, rtx x, int code)
   switch (aarch64_classify_symbolic_expression (x))
{
case SYMBOL_SMALL_GOT_4G:
- asm_fprintf (asm_out_file, ":lo12:");
+ asm_fprintf (asm_out_file, ":got_lo12:");
  break;
 
case SYMBOL_SMALL_TLSGD:
@@ -20241,6 +20202,11 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
   return aarch64_simd_valid_immediate (x, NULL);
 }
 
+  /* GOT accesses are valid moves.  */
+  if (SYMBOL_REF_P (x)
+  && aarch64_classify_symbolic_expression (x) == SYMBOL_SMALL_GOT_4G)
+return true;
+
   x = strip_salt (x);
   if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
 return true;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
7085cd4a51dc4c22def9b95f2221b3847603a9e5..9d38269e9429597b825838faf9f241216cc6ab47
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1263,8 +1263,8 @@ (define_expand "mov"
 )
 
 (define_insn_and_split "*movsi_aarch64"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  
r,  r, w,r,w, w")
-   (match_operand:SI 1 "aarch64_mov_operand"  " 
r,r,k,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Ds"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  
r,  r,  r, w,r,w, w")
+   (match_operand:SI 1 "aarch64_mov_operand"  " 
r,r,k,M,n,Usv,m,m,

Re: [PATCH] restore ancient -Waddress for weak symbols [PR33925]

2021-11-02 Thread Martin Sebor via Gcc-patches

On 10/4/21 4:39 PM, Eric Gallager wrote:

On Mon, Oct 4, 2021 at 2:43 PM Martin Sebor via Gcc-patches
 wrote:


While resolving the recent -Waddress enhancement request (PR
PR102103) I came across a 2007 problem report about GCC 4 having
stopped warning for using the address of inline functions in
equality comparisons with null.  With inline functions being
commonplace in C++ this seems like an important use case for
the warning.

The change that resulted in suppressing the warning in these
cases was introduced inadvertently in a fix for PR 22252.

To restore the warning, the attached patch enhances
the decl_with_nonnull_addr_p() function to return true also for
weak symbols for which a definition has been provided.

Tested on x86_64-linux and by comparing the GCC output for new
test cases to Clang's which diagnoses all but one instance of
these cases with either -Wtautological-pointer-compare or
-Wpointer-bool-conversion, depending on context.


Would it make sense to use the same names as clang's flags here, too,
instead of dumping them all under -Waddress? I think the additional
granularity could be helpful for people who only want some warnings,
but not others.


In general I agree.  In this case I'm not sure.  The options
that control these warnings in neither compiler make perfect
sense to me.  Here's a breakdown of the cases:

   ClangGCC
array == array -Wtautological-compare   -Warray-compare
&decl == null  -Wtautological-pointer-compare   -Waddress
&decl1 == &decl2   N/A  N/A

GCC has recently diverged from Clang by introducing the new
-Warray-compare option, and we don't have
-Wtautological-pointer-compare.  So while I think it makes
sense to use the same names for new features as those they
are controlled by in Clang, the argument to do the same for
simple enhancements to existing features is quite a bit less
compelling.  We'd likely end up diagnosing different subsets
of the same problem under different options.

Martin




The one case where Clang doesn't issue a warning but GCC
with the patch does is for a symbol explicitly declared with
attribute weak for which a definition has been provided.
I believe the address of such symbols is necessarily nonnull and
so issuing the warning is helpful
(both GCC and Clang fold such comparisons to a constant).

Martin




Re: [PATCH v3] AArch64: Improve GOT addressing

2021-11-02 Thread Richard Sandiford via Gcc-patches
Wilco Dijkstra  writes:
> Hi Richard,
>
>> - Why do we rewrite the constant moves after reload into ldr_got_small_sidi
>>   and ldr_got_small_?  Couldn't we just get the move patterns to
>>   output the sequence directly?
>
> That's possible too, however it makes the movsi/di patterns more complex.

Yeah, it certainly does that, but it also makes the other code
significantly simpler. :-)

> See version v4 below.

Thanks, this looks good apart from a couple of nits:

> […]
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 7085cd4a51dc4c22def9b95f2221b3847603a9e5..9d38269e9429597b825838faf9f241216cc6ab47
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1263,8 +1263,8 @@ (define_expand "mov"
>  )
>
>  (define_insn_and_split "*movsi_aarch64"
> -  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, 
>  r,  r, w,r,w, w")
> -   (match_operand:SI 1 "aarch64_mov_operand"  " 
> r,r,k,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Ds"))]
> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, 
>  r,  r,  r, w,r,w, w")
> +   (match_operand:SI 1 "aarch64_mov_operand"  " 
> r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
>"(register_operand (operands[0], SImode)
>  || aarch64_reg_or_zero (operands[1], SImode))"
>"@
> @@ -1278,6 +1278,7 @@ (define_insn_and_split "*movsi_aarch64"
> ldr\\t%s0, %1
> str\\t%w1, %0
> str\\t%s1, %0
> +   * return \"adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]\";

The * return stuff shouldn't be necessary here.  E.g. the SVE MOVPRFX
alternatives directly use \; in @ alternatives.

> adr\\t%x0, %c1
> adrp\\t%x0, %A1
> fmov\\t%s0, %w1
> @@ -1293,13 +1294,15 @@ (define_insn_and_split "*movsi_aarch64"
>  }"
>;; The "mov_imm" type for CNT is just a placeholder.
>[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
> -   
> load_4,store_4,store_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
> -   (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")]
> +   
> load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
> +   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> +   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
> +]
>  )
>
>  (define_insn_and_split "*movdi_aarch64"
> -  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, 
> m,m,  r,  r, w,r,w, w")
> -   (match_operand:DI 1 "aarch64_mov_operand"  " 
> r,r,k,N,M,n,Usv,m,m,rZ,w,Usa,Ush,rZ,w,w,Dd"))]
> +  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, 
> m,m,   r,  r,  r, w,r,w, w")
> +   (match_operand:DI 1 "aarch64_mov_operand"  " 
> r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
>"(register_operand (operands[0], DImode)
>  || aarch64_reg_or_zero (operands[1], DImode))"
>"@
> @@ -1314,13 +1317,14 @@ (define_insn_and_split "*movdi_aarch64"
> ldr\\t%d0, %1
> str\\t%x1, %0
> str\\t%d1, %0
> +   * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\" : 
> \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\";
> adr\\t%x0, %c1
> adrp\\t%x0, %A1
> fmov\\t%d0, %x1
> fmov\\t%x0, %d1
> fmov\\t%d0, %d1
> * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
> -   "(CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), 
> DImode))
> +   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), 
> DImode)
>  && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
> [(const_int 0)]
> "{
> @@ -1329,9 +1333,10 @@ (define_insn_and_split "*movdi_aarch64"
>  }"
>;; The "mov_imm" type for CNTD is just a placeholder.
>[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
> -load_8,load_8,store_8,store_8,adr,adr,f_mcr,f_mrc,fmov,
> -neon_move")
> -   (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,fp,fp,fp,simd")]
> +load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
> +fmov,neon_move")
> +   (set_attr "arch"   "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> +   (set_attr "length" "4,4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")]
>  )
>
>  (define_insn "insv_imm"
> @@ -6707,29 +6712,6 @@ (define_insn "add_losym_"
>[(set_attr "type" "alu_imm")]
>  )
>
> -(define_insn "ldr_got_small_"
> -  [(set (match_operand:PTR 0 "register_operand" "=r")
> -   (unspec:PTR [(mem:PTR (lo_sum:PTR
> - (match_operand:PTR 1 "register_operand" "r")
> - (match_operand:PTR 2 "aarch64_valid_symref" 
> "S")))]
> -   UNSPEC_GOTSMALLPIC))]
> -  ""
> -  "ldr\\t%0, [%1, #:got_lo12:%c2]"
> -  [(set_attr "type" "load_")]
> -)
> -
> -(define_insn "ldr_got_small_sidi"
> -  [(set (match_operand:DI 0 "register_operand" "=r")
> -   (zero_extend:DI
> -(unspec:SI [(mem:S

Re: [PATCH v2] libcpp: Implement -Wbidirectional for CVE-2021-42574 [PR103026]

2021-11-02 Thread Martin Sebor via Gcc-patches

On 11/2/21 11:18 AM, Marek Polacek via Gcc-patches wrote:

On Mon, Nov 01, 2021 at 10:10:40PM +, Joseph Myers wrote:

On Mon, 1 Nov 2021, Marek Polacek via Gcc-patches wrote:


+  /* We've read a bidi char, update the current vector as necessary.  */
+  void on_char (kind k, bool ucn_p)
+  {
+switch (k)
+  {
+  case kind::LRE:
+  case kind::RLE:
+  case kind::LRO:
+  case kind::RLO:
+   vec.push (ucn_p ? 3u : 1u);
+   break;
+  case kind::LRI:
+  case kind::RLI:
+  case kind::FSI:
+   vec.push (ucn_p ? 2u : 0u);
+   break;
+  case kind::PDF:
+   if (current_ctx () == kind::PDF)
+ pop ();
+   break;
+  case kind::PDI:
+   if (current_ctx () == kind::PDI)
+ pop ();


My understanding is that PDI should pop all intermediate PDF contexts
outward to a PDI context, which it also pops.  (But if it's embedded only
in PDF contexts, with no PDI context containing it, it doesn't pop
anything.)

I think failing to handle that only means libcpp sometimes models there
as being more bidirectional contexts open than there should be, so it
might give spurious warnings when in fact all such contexts had been
closed by end of string or comment.


Ah, you're right.
https://www.unicode.org/reports/tr9/#Terminating_Explicit_Directional_Isolates
says that "[PDI] terminates the scope of the last LRI, RLI, or FSI whose
scope has not yet been terminated, as well as the scopes of any subsequent
LREs, RLEs, LROs, or RLOs whose scopes have not yet been terminated."
but PDF doesn't have the latter quirk.

Fixed in the below: I added a suitable truncate into on_char.  The new test
Wbidirectional-14.c exercises the handling of PDI.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
 From a link below:
"An issue was discovered in the Bidirectional Algorithm in the Unicode
Specification through 14.0. It permits the visual reordering of
characters via control sequences, which can be used to craft source code
that renders different logic than the logical ordering of tokens
ingested by compilers and interpreters. Adversaries can leverage this to
encode source code for compilers accepting Unicode such that targeted
vulnerabilities are introduced invisibly to human reviewers."

More info:
https://nvd.nist.gov/vuln/detail/CVE-2021-42574
https://trojansource.codes/

This is not a compiler bug.  However, to mitigate the problem, this patch
implements -Wbidirectional=[none|unpaired|any] to warn about possibly
misleading Unicode bidirectional characters the preprocessor may encounter.


Birectional sounds very general.  Can we come up with a name
that's a bit more descriptive of the problem the warning reports?
From skimming the docs and the tests it looks like the warning
points out uses of bidirectonal characters in the program source
code as well as comments.  Would -Wbidirectional-text be better?
Or -Wbidirectional-chars?  (If Clang is also adding a warning
for this, syncing up with them one way or the other might be
helpful.)

...

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index c5730228821..9dfb95dc24c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -327,7 +327,9 @@ Objective-C and Objective-C++ Dialects}.
  -Warith-conversion @gol
  -Warray-bounds  -Warray-bounds=@var{n}  -Warray-compare @gol
  -Wno-attributes  -Wattribute-alias=@var{n} -Wno-attribute-alias @gol
--Wno-attribute-warning  -Wbool-compare  -Wbool-operation @gol
+-Wno-attribute-warning  @gol
+-Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol
+-Wbool-compare  -Wbool-operation @gol
  -Wno-builtin-declaration-mismatch @gol
  -Wno-builtin-macro-redefined  -Wc90-c99-compat  -Wc99-c11-compat @gol
  -Wc11-c2x-compat @gol
@@ -7674,6 +7676,21 @@ Attributes considered include @code{alloc_align}, 
@code{alloc_size},
  This is the default.  You can disable these warnings with either
  @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}.
  
+@item -Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]}

+@opindex Wbidirectional=
+@opindex Wbidirectional
+@opindex Wno-bidirectional
+Warn about UTF-8 bidirectional characters.


I suggest to mention where.  If everywhere, enumerate the most
common contexts to make it clear it means everywhere:

  Warn about UTF-8 bidirectional characters in source code,
  including string literals, identifiers, and comments.

Martin


Re: [PATCH] RISC-V: Enable overlap-by-pieces in case of fast unaliged access

2021-11-02 Thread Vineet Gupta

On 7/22/21 6:29 AM, Kito Cheng via Gcc-patches wrote:

Could you add a testcase? Otherwise LGTM.

Option: -O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64
void foo(char *dst){
__builtin_memset(dst, 0, 15);
}

On Thu, Jul 22, 2021 at 8:53 PM Christoph Muellner via Gcc-patches
 wrote:


This patch enables the overlap-by-pieces feature of the by-pieces
infrastructure for inlining builtins in case the target has set
riscv_slow_unaligned_access_p to false.

To demonstrate the effect for targets with fast unaligned access,
the following code sequences are generated for a 15-byte memset-zero.

Without overlap_op_by_pieces we get:
   8e:   00053023sd  zero,0(a0)
   92:   00052423sw  zero,8(a0)
   96:   00051623sh  zero,12(a0)
   9a:   00050723sb  zero,14(a0)


To generate even the non optimized code above with gcc 11 [1][2], what 
do I need to do. Despite -mno-strict-align and trying -mtune={rocket, 
sifive-7-series}, I only get the fully unrolled version


foo:
# memcpy-15.c:2:__builtin_memset(dst, 0, 15);
sb  zero,0(a0)  #, MEM  [(void *)dst_2(D)]
sb  zero,1(a0)  #, MEM  [(void *)dst_2(D)]
sb  zero,2(a0)  #, MEM  [(void *)dst_2(D)]
sb  zero,3(a0)  #, MEM  [(void *)dst_2(D)]
sb  zero,4(a0)  #, MEM  [(void *)dst_2(D)]
sb  zero,5(a0)  #, MEM  [(void *)dst_2(D)]
sb  zero,6(a0)  #, MEM  [(void *)dst_2(D)]
sb  zero,7(a0)  #, MEM  [(void *)dst_2(D)]
sb  zero,8(a0)  #, MEM  [(void *)dst_2(D)]
sb  zero,9(a0)  #, MEM  [(void *)dst_2(D)]
sb  zero,10(a0) #, MEM  [(void *)dst_2(D)]
sb  zero,11(a0) #, MEM  [(void *)dst_2(D)]
sb  zero,12(a0) #, MEM  [(void *)dst_2(D)]
sb  zero,13(a0) #, MEM  [(void *)dst_2(D)]
sb  zero,14(a0) #, MEM  [(void *)dst_2(D)]
ret 
.size   foo, .-foo
.ident  "GCC: (GNU) 11.1.0"

[1] https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581858.html
[2] https://github.com/kito-cheng/riscv-gcc/tree/riscv-gcc-11.1.0-zbabcs

Thx,
-Vineet



With overlap_op_by_pieces we get:
   7e:   00053023sd  zero,0(a0)
   82:   000533a3sd  zero,7(a0)

gcc/ChangeLog:

 * config/riscv/riscv.c (riscv_overlap_op_by_pieces): New function.
 (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
 riscv_overlap_op_by_pieces.

Signed-off-by: Christoph Muellner 
---
  gcc/config/riscv/riscv.c | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 576960bb37c..98c76ba657a 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -5201,6 +5201,14 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
return riscv_slow_unaligned_access_p;
  }

+/* Implement TARGET_OVERLAP_OP_BY_PIECES_P.  */
+
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return !riscv_slow_unaligned_access_p;
+}
+
  /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */

  static bool
@@ -5525,6 +5533,9 @@ riscv_asan_shadow_offset (void)
  #undef TARGET_SLOW_UNALIGNED_ACCESS
  #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access

+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
  #undef TARGET_SECONDARY_MEMORY_NEEDED
  #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed

--
2.31.1







Re: [PATCH] restore ancient -Waddress for weak symbols [PR33925]

2021-11-02 Thread Marek Polacek via Gcc-patches
On Tue, Nov 02, 2021 at 12:51:16PM -0600, Martin Sebor via Gcc-patches wrote:
> On 10/4/21 4:39 PM, Eric Gallager wrote:
> > On Mon, Oct 4, 2021 at 2:43 PM Martin Sebor via Gcc-patches
> >  wrote:
> > > 
> > > While resolving the recent -Waddress enhancement request (PR
> > > PR102103) I came across a 2007 problem report about GCC 4 having
> > > stopped warning for using the address of inline functions in
> > > equality comparisons with null.  With inline functions being
> > > commonplace in C++ this seems like an important use case for
> > > the warning.
> > > 
> > > The change that resulted in suppressing the warning in these
> > > cases was introduced inadvertently in a fix for PR 22252.
> > > 
> > > To restore the warning, the attached patch enhances
> > > the decl_with_nonnull_addr_p() function to return true also for
> > > weak symbols for which a definition has been provided.
> > > 
> > > Tested on x86_64-linux and by comparing the GCC output for new
> > > test cases to Clang's which diagnoses all but one instance of
> > > these cases with either -Wtautological-pointer-compare or
> > > -Wpointer-bool-conversion, depending on context.
> > 
> > Would it make sense to use the same names as clang's flags here, too,
> > instead of dumping them all under -Waddress? I think the additional
> > granularity could be helpful for people who only want some warnings,
> > but not others.
> 
> In general I agree.  In this case I'm not sure.  The options
> that control these warnings in neither compiler make perfect
> sense to me.  Here's a breakdown of the cases:
> 
>ClangGCC
> array == array -Wtautological-compare   -Warray-compare
> &decl == null  -Wtautological-pointer-compare   -Waddress
> &decl1 == &decl2   N/A  N/A
> 
> GCC has recently diverged from Clang by introducing the new
> -Warray-compare option, and we don't have

That's not exactly true: -Warray-compare is not meant to be
-Wtautological-compare.  I introduced -Warray-compare because
the C++ standard now says that array == array is deprecated,
but -Wtautological-compare comes from a different angle: it
warns because the comparison always evaluates to a constant.

> -Wtautological-pointer-compare.  So while I think it makes
> sense to use the same names for new features as those they
> are controlled by in Clang, the argument to do the same for
> simple enhancements to existing features is quite a bit less
> compelling.  We'd likely end up diagnosing different subsets
> of the same problem under different options.

Yeah, in this particular case I think it'd be better to simple enhance
-Waddress rather than to invent a new option.

Marek



Re: [PATCH] Initial implementation of -Whomoglyph [PR preprocessor/103027]

2021-11-02 Thread Martin Sebor via Gcc-patches

On 11/1/21 3:14 PM, David Malcolm via Gcc-patches wrote:

[Resending to get around mailing list size limit; see notes below]

This patch implements a new -Whomoglyph diagnostic, enabled by default.

Internally it implements the "skeleton" algorithm from:
   http://www.unicode.org/reports/tr39/#Confusable_Detection
so that every new identifier is mapped to a "skeleton", and if
the skeleton is already in use by a different identifier, issue
a -Whomoglyph diagnostic.
It uses the data from:
   https://www.unicode.org/Public/security/13.0.0/confusables.txt
to determine which characters are confusable.

For example, given the example of CVE-2021-42694 at
https://trojansource.codes/, with this patch we emit:

t.cc:7:1: warning: identifier ‘sayНello’ (‘say\u041dello’)... [CWE-1007] 
[-Whomoglyph]
 7 | void sayello() {
   | ^~~~
t.cc:3:1: note: ...confusable with non-equal identifier ‘sayHello’ here
 3 | void sayHello() {
   | ^~~~

(the precise location of the token isn't quite right; the
identifiers should be underlined, rather than the "void" tokens)

This takes advantage of:
   "diagnostics: escape non-ASCII source bytes for certain diagnostics"
 https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583020.html
to escape non-ASCII characters when printing a source line for -Whomoglyph,
so that we print "sayello" when quoting the source line, making it
clearer that this is not "sayHello".

In order to implement "skeleton", I had to implement NFD support, so the
patch also contains some UTF-32 support code.

Known issues:
- I'm doing an extra hash_table lookup on every identifier lookup.
   I haven't yet measured the impact on the speed of the compiler.
   If this is an issue, is there a good place to stash an extra
   pointer in every identifier?
- doesn't yet bootstrap, as the confusables.txt data contains ASCII
   to ASCII confusables, leading to warnings such as:
../../.././gcc/options.h:11273:3: warning: identifier ‘OPT_l’... [CWE-1007] 
[-Whomoglyph]
../../.././gcc/options.h:9959:3: note: ...confusable with non-equal identifier 
‘OPT_I’ (‘OPT_I’) here
   Perhaps the option should have levels, where we don't complain about
   pure ASCII confusables at the default level?
- no docs yet
- as noted above the location_t of the token isn't quite right
- map_identifier_to_skeleton and map_skeleton_to_first_use aren't
   yet integrated with the garbage collector
- some other FIXMEs in the patch

[I had to trim the patch for space to get it to pass the size filter on the
mailing list; I trimmed:
   contrib/unicode/confusables.txt,
   gcc/testsuite/selftests/NormalizationTest.txt
which can be downloaded from the URLs in the ChangeLog, and:
   gcc/confusables.inc
   gcc/decomposition.inc
which can be generated using the scripts in the patch ]

Thoughts?


None from me on the actual feature -- even after our discussion
this morning I remain comfortably ignorant of the problem :)
I just have a quick comment on the two new string classes:

...

+
+/* A class for manipulating UTF-32 strings.  */
+
+class utf32_string
+{

...

+ private:

...

+  cppchar_t *m_buf;
+  size_t m_alloc_len;
+  size_t m_len;
+};
+
+/* A class for constructing UTF-8 encoded strings.
+   These are not NUL-terminated.  */
+
+class utf8_string
+{

...

+ private:
+  uchar  *m_buf;
+  size_t m_alloc_sz;
+  size_t m_len;
+};


There are container abstractions both in C++ and in GCC that
these classes look like they could be implemented in terms of:
I'm thinking of std::string, std::vector, vec, and auto_vec.
They have the additional advantage of being safely copyable
and assignable, and of course, of having already been tested.
I see that the classes in your patch provide additional
functionality that the abstractions don't.  I'd expect it
be doable on top of the abstractions and without
reimplementing all the basic buffer management.

Martin


Re: [PATCH v2] libcpp: Implement -Wbidirectional for CVE-2021-42574 [PR103026]

2021-11-02 Thread Marek Polacek via Gcc-patches
On Tue, Nov 02, 2021 at 01:20:03PM -0600, Martin Sebor wrote:
> On 11/2/21 11:18 AM, Marek Polacek via Gcc-patches wrote:
> > On Mon, Nov 01, 2021 at 10:10:40PM +, Joseph Myers wrote:
> > > On Mon, 1 Nov 2021, Marek Polacek via Gcc-patches wrote:
> > > 
> > > > +  /* We've read a bidi char, update the current vector as necessary.  
> > > > */
> > > > +  void on_char (kind k, bool ucn_p)
> > > > +  {
> > > > +switch (k)
> > > > +  {
> > > > +  case kind::LRE:
> > > > +  case kind::RLE:
> > > > +  case kind::LRO:
> > > > +  case kind::RLO:
> > > > +   vec.push (ucn_p ? 3u : 1u);
> > > > +   break;
> > > > +  case kind::LRI:
> > > > +  case kind::RLI:
> > > > +  case kind::FSI:
> > > > +   vec.push (ucn_p ? 2u : 0u);
> > > > +   break;
> > > > +  case kind::PDF:
> > > > +   if (current_ctx () == kind::PDF)
> > > > + pop ();
> > > > +   break;
> > > > +  case kind::PDI:
> > > > +   if (current_ctx () == kind::PDI)
> > > > + pop ();
> > > 
> > > My understanding is that PDI should pop all intermediate PDF contexts
> > > outward to a PDI context, which it also pops.  (But if it's embedded only
> > > in PDF contexts, with no PDI context containing it, it doesn't pop
> > > anything.)
> > > 
> > > I think failing to handle that only means libcpp sometimes models there
> > > as being more bidirectional contexts open than there should be, so it
> > > might give spurious warnings when in fact all such contexts had been
> > > closed by end of string or comment.
> > 
> > Ah, you're right.
> > https://www.unicode.org/reports/tr9/#Terminating_Explicit_Directional_Isolates
> > says that "[PDI] terminates the scope of the last LRI, RLI, or FSI whose
> > scope has not yet been terminated, as well as the scopes of any subsequent
> > LREs, RLEs, LROs, or RLOs whose scopes have not yet been terminated."
> > but PDF doesn't have the latter quirk.
> > 
> > Fixed in the below: I added a suitable truncate into on_char.  The new test
> > Wbidirectional-14.c exercises the handling of PDI.
> > 
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > -- >8 --
> >  From a link below:
> > "An issue was discovered in the Bidirectional Algorithm in the Unicode
> > Specification through 14.0. It permits the visual reordering of
> > characters via control sequences, which can be used to craft source code
> > that renders different logic than the logical ordering of tokens
> > ingested by compilers and interpreters. Adversaries can leverage this to
> > encode source code for compilers accepting Unicode such that targeted
> > vulnerabilities are introduced invisibly to human reviewers."
> > 
> > More info:
> > https://nvd.nist.gov/vuln/detail/CVE-2021-42574
> > https://trojansource.codes/
> > 
> > This is not a compiler bug.  However, to mitigate the problem, this patch
> > implements -Wbidirectional=[none|unpaired|any] to warn about possibly
> > misleading Unicode bidirectional characters the preprocessor may encounter.
> 
> Birectional sounds very general.  Can we come up with a name
> that's a bit more descriptive of the problem the warning reports?
> From skimming the docs and the tests it looks like the warning
> points out uses of bidirectonal characters in the program source
> code as well as comments.  Would -Wbidirectional-text be better?
> Or -Wbidirectional-chars?  (If Clang is also adding a warning
> for this, syncing up with them one way or the other might be
> helpful.)

I dunno, I could go with -Wbidirectional-chars.  Does anyone else
think I should rename the current name to -Wbidirectional-chars?

Other ideas: -Wunicode-bidi / -Wmultibyte-chars / -Wmisleading-bidirectional.

The patch for clang-tidy I saw called this misleading-bidirectional.

> ...
> > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> > index c5730228821..9dfb95dc24c 100644
> > --- a/gcc/doc/invoke.texi
> > +++ b/gcc/doc/invoke.texi
> > @@ -327,7 +327,9 @@ Objective-C and Objective-C++ Dialects}.
> >   -Warith-conversion @gol
> >   -Warray-bounds  -Warray-bounds=@var{n}  -Warray-compare @gol
> >   -Wno-attributes  -Wattribute-alias=@var{n} -Wno-attribute-alias @gol
> > --Wno-attribute-warning  -Wbool-compare  -Wbool-operation @gol
> > +-Wno-attribute-warning  @gol
> > +-Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]} @gol
> > +-Wbool-compare  -Wbool-operation @gol
> >   -Wno-builtin-declaration-mismatch @gol
> >   -Wno-builtin-macro-redefined  -Wc90-c99-compat  -Wc99-c11-compat @gol
> >   -Wc11-c2x-compat @gol
> > @@ -7674,6 +7676,21 @@ Attributes considered include @code{alloc_align}, 
> > @code{alloc_size},
> >   This is the default.  You can disable these warnings with either
> >   @option{-Wno-attribute-alias} or @option{-Wattribute-alias=0}.
> > +@item -Wbidirectional=@r{[}none@r{|}unpaired@r{|}any@r{]}
> > +@opindex Wbidirectional=
> > +@opindex Wbidirectional
> > +@opindex Wno-bidirectional
> > +Warn about UTF-8 bidirectional characte

Re: [PATCH] RISC-V: Enable overlap-by-pieces in case of fast unaliged access

2021-11-02 Thread Christoph Müllner via Gcc-patches
On Tue, Nov 2, 2021 at 8:27 PM Vineet Gupta  wrote:
>
> On 7/22/21 6:29 AM, Kito Cheng via Gcc-patches wrote:
> > Could you add a testcase? Otherwise LGTM.
> >
> > Option: -O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64
> > void foo(char *dst){
> > __builtin_memset(dst, 0, 15);
> > }
> >
> > On Thu, Jul 22, 2021 at 8:53 PM Christoph Muellner via Gcc-patches
> >  wrote:
> >>
> >> This patch enables the overlap-by-pieces feature of the by-pieces
> >> infrastructure for inlining builtins in case the target has set
> >> riscv_slow_unaligned_access_p to false.
> >>
> >> To demonstrate the effect for targets with fast unaligned access,
> >> the following code sequences are generated for a 15-byte memset-zero.
> >>
> >> Without overlap_op_by_pieces we get:
> >>8e:   00053023sd  zero,0(a0)
> >>92:   00052423sw  zero,8(a0)
> >>96:   00051623sh  zero,12(a0)
> >>9a:   00050723sb  zero,14(a0)
>
> To generate even the non optimized code above with gcc 11 [1][2], what
> do I need to do. Despite -mno-strict-align and trying -mtune={rocket,
> sifive-7-series}, I only get the fully unrolled version

You need a tuning struct with slow_unaligned_access == false.
Both, Rocket and Sifive 7, have slow unaligned access set to true.
Mainline you have thead-c906 which would work.

BR
Christoph

>
> foo:
> # memcpy-15.c:2:__builtin_memset(dst, 0, 15);
> sb  zero,0(a0)  #, MEM  [(void *)dst_2(D)]
> sb  zero,1(a0)  #, MEM  [(void *)dst_2(D)]
> sb  zero,2(a0)  #, MEM  [(void *)dst_2(D)]
> sb  zero,3(a0)  #, MEM  [(void *)dst_2(D)]
> sb  zero,4(a0)  #, MEM  [(void *)dst_2(D)]
> sb  zero,5(a0)  #, MEM  [(void *)dst_2(D)]
> sb  zero,6(a0)  #, MEM  [(void *)dst_2(D)]
> sb  zero,7(a0)  #, MEM  [(void *)dst_2(D)]
> sb  zero,8(a0)  #, MEM  [(void *)dst_2(D)]
> sb  zero,9(a0)  #, MEM  [(void *)dst_2(D)]
> sb  zero,10(a0) #, MEM  [(void *)dst_2(D)]
> sb  zero,11(a0) #, MEM  [(void *)dst_2(D)]
> sb  zero,12(a0) #, MEM  [(void *)dst_2(D)]
> sb  zero,13(a0) #, MEM  [(void *)dst_2(D)]
> sb  zero,14(a0) #, MEM  [(void *)dst_2(D)]
> ret
> .size   foo, .-foo
> .ident  "GCC: (GNU) 11.1.0"
>
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2021-October/581858.html
> [2] https://github.com/kito-cheng/riscv-gcc/tree/riscv-gcc-11.1.0-zbabcs
>
> Thx,
> -Vineet
>
> >>
> >> With overlap_op_by_pieces we get:
> >>7e:   00053023sd  zero,0(a0)
> >>82:   000533a3sd  zero,7(a0)
> >>
> >> gcc/ChangeLog:
> >>
> >>  * config/riscv/riscv.c (riscv_overlap_op_by_pieces): New function.
> >>  (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
> >>  riscv_overlap_op_by_pieces.
> >>
> >> Signed-off-by: Christoph Muellner 
> >> ---
> >>   gcc/config/riscv/riscv.c | 11 +++
> >>   1 file changed, 11 insertions(+)
> >>
> >> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> >> index 576960bb37c..98c76ba657a 100644
> >> --- a/gcc/config/riscv/riscv.c
> >> +++ b/gcc/config/riscv/riscv.c
> >> @@ -5201,6 +5201,14 @@ riscv_slow_unaligned_access (machine_mode, unsigned 
> >> int)
> >> return riscv_slow_unaligned_access_p;
> >>   }
> >>
> >> +/* Implement TARGET_OVERLAP_OP_BY_PIECES_P.  */
> >> +
> >> +static bool
> >> +riscv_overlap_op_by_pieces (void)
> >> +{
> >> +  return !riscv_slow_unaligned_access_p;
> >> +}
> >> +
> >>   /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
> >>
> >>   static bool
> >> @@ -5525,6 +5533,9 @@ riscv_asan_shadow_offset (void)
> >>   #undef TARGET_SLOW_UNALIGNED_ACCESS
> >>   #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
> >>
> >> +#undef TARGET_OVERLAP_OP_BY_PIECES_P
> >> +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
> >> +
> >>   #undef TARGET_SECONDARY_MEMORY_NEEDED
> >>   #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
> >>
> >> --
> >> 2.31.1
> >>
> >


Re: Building GNU Arm Embedded Toolchain for macOS/arm64

2021-11-02 Thread Romain Goyet via Gcc-patches
Hi Iain,

Thanks for your reply! However, what I'm referring to is a lot more simple
than that: I just want to build bare-metal binaries for Cortex-M devices
from a Darwin/arm64 host. Not to produce Mach-O binaries that can run on
Darwin/arm64.

Essentially, I just made the toolchain at
https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm
run on macOS/arm64. It still only produces Cortex-M ELF binaries.

I'm looking for the right person at Arm to help them ship a pre-built arm64
image on the page above. I'm really trying to solve a very practical
problem. My patches are super simple:
- Fix their build-prerequisite.sh and build-toolchain.sh scripts
- Fix GMP so it uses the proper macOS syntax on Arm
- Fix libelf so it picks up stdlib.h
- Fix a gcc-hook

Thanks!

 - Romain


On Tue, Nov 2, 2021 at 11:35 AM Iain Sandoe  wrote:

> Hi Romain
>
> > On 2 Nov 2021, at 13:09, Romain Goyet via Gcc-patches <
> gcc-patches@gcc.gnu.org> wrote:
>
> > Arm distribute pre-built versions of GCC that targets bare-metal Cortex-M
> > devices at
> >
> https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm
>
> > I have written a few small patches to get this toolchain to build and run
> > on macOS/arm64. Should I submit them somewhere?
>
> The work to implement Arm64 on macOS is on-going (prototype is here:
> https://github.com/iains/gcc-darwin-arm64)
>
> The patches for host-side support are mostly ready (probably your patches
> will be
> quite similar to those on the prototype, but if there’s something extra
> we’ve missed so far
> please feel free to post them to this list - please cc me).
>
> I am hoping to get the host-side stuff into master first (and reasonably
> soon).
>
> thanks
> Iain
>
> main host-side support:
>
>
> https://github.com/iains/gcc-darwin-arm64/commit/af097efc24a72c005756b05f65f0f450e41340ed
> + next four patches (deals with the fact that Arm64-darwin cannot use PCH).
>
> driver:
>
> https://github.com/iains/gcc-darwin-arm64/commit/5757cced1e1c8d4f0ec5458b9af7154d694e400b
>
> ada host-side tools.
>
> https://github.com/iains/gcc-darwin-arm64/commit/c16becf974da73646eb7b5e356323ffa4a585b21
>
>


Re: [PATCH] RISC-V: Enable overlap-by-pieces in case of fast unaliged access

2021-11-02 Thread Vineet Gupta




On 11/2/21 1:09 PM, Christoph Müllner wrote:

Without overlap_op_by_pieces we get:
8e:   00053023sd  zero,0(a0)
92:   00052423sw  zero,8(a0)
96:   00051623sh  zero,12(a0)
9a:   00050723sb  zero,14(a0)

To generate even the non optimized code above with gcc 11 [1][2], what
do I need to do. Despite -mno-strict-align and trying -mtune={rocket,
sifive-7-series}, I only get the fully unrolled version

You need a tuning struct with slow_unaligned_access == false.
Both, Rocket and Sifive 7, have slow unaligned access set to true.
Mainline you have thead-c906 which would work.


But doesn't -mno-strict-align imply that ?

Thx,
-Vineet



Re: [PATCH] vect: Add bias parameter for partial vectorization

2021-11-02 Thread Robin Dapp via Gcc-patches
Hi,

thanks for the helpful comments. The attached v2 addresses the following
points from them:

 - Save the bias in loop_vinfo and set it once in vect_verify_loop_lens.
 - Add code to handle the bias in vect_set_loop_controls_directly.
 - Adjust costing.
 - Add comments for the new len_load parameter as well as change wording.
 - Include the rs6000 change directly.

I'm not entirely satisfied with the bias code for the loop controls,
mainly because of side effects I might not have considered.  The test
suites show no new regressions and bootstrap succeeded, though.

Regards
 Robindiff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bf033e31c1c..dc2756f83e9 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5637,7 +5637,8 @@
 (define_expand "len_load_v16qi"
   [(match_operand:V16QI 0 "vlogical_operand")
(match_operand:V16QI 1 "memory_operand")
-   (match_operand:QI 2 "gpc_reg_operand")]
+   (match_operand:QI 2 "gpc_reg_operand")
+   (match_operand:QI 3 "zero_constant")]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
   rtx mem = XEXP (operands[1], 0);
@@ -5651,6 +5652,7 @@
   [(match_operand:V16QI 0 "memory_operand")
(match_operand:V16QI 1 "vlogical_operand")
(match_operand:QI 2 "gpc_reg_operand")
+   (match_operand:QI 3 "zero_constant")
   ]
   "TARGET_P9_VECTOR && TARGET_64BIT"
 {
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 2b41cb7fb7b..265c76f1609 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5213,7 +5213,10 @@ which must be a vector mode.  Operand 2 has whichever integer mode the
 target prefers.  If operand 2 exceeds the number of elements in mode
 @var{m}, the behavior is undefined.  If the target prefers the length
 to be measured in bytes rather than elements, it should only implement
-this pattern for vectors of @code{QI} elements.
+this pattern for vectors of @code{QI} elements.  Operand 3 specifies
+a bias predicate that determines whether a length of zero is permitted
+or not.  If permitted, the predicate should only allow a zero immediate,
+otherwhise it should only allow an immediate value of -1.
 
 This pattern is not allowed to @code{FAIL}.
 
@@ -5226,7 +5229,10 @@ a vector mode.  Operand 2 has whichever integer mode the target prefers.
 If operand 2 exceeds the number of elements in mode @var{m}, the behavior
 is undefined.  If the target prefers the length to be measured in bytes
 rather than elements, it should only implement this pattern for vectors
-of @code{QI} elements.
+of @code{QI} elements.  Operand 3 specifies a bias predicate that
+determines whether a length of zero is permitted or not.  If permitted,
+the predicate should only allow a zero constant, otherwhise it should
+only allow an immediate value of -1.
 
 This pattern is not allowed to @code{FAIL}.
 
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 8312d08aab2..90fdc440248 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -2696,9 +2696,9 @@ expand_call_mem_ref (tree type, gcall *stmt, int index)
 static void
 expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 {
-  class expand_operand ops[3];
-  tree type, lhs, rhs, maskt;
-  rtx mem, target, mask;
+  class expand_operand ops[4];
+  tree type, lhs, rhs, maskt, biast;
+  rtx mem, target, mask, bias;
   insn_code icode;
 
   maskt = gimple_call_arg (stmt, 2);
@@ -2723,11 +2723,20 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
   create_output_operand (&ops[0], target, TYPE_MODE (type));
   create_fixed_operand (&ops[1], mem);
   if (optab == len_load_optab)
-create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
- TYPE_UNSIGNED (TREE_TYPE (maskt)));
+{
+  create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)),
+   TYPE_UNSIGNED (TREE_TYPE (maskt)));
+  biast = gimple_call_arg (stmt, 3);
+  bias = expand_normal (biast);
+  create_input_operand (&ops[3], bias, QImode);
+  expand_insn (icode, 4, ops);
+}
   else
+{
 create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
-  expand_insn (icode, 3, ops);
+expand_insn (icode, 3, ops);
+}
+
   if (!rtx_equal_p (target, ops[0].value))
 emit_move_insn (target, ops[0].value);
 }
@@ -2741,9 +2750,9 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 static void
 expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 {
-  class expand_operand ops[3];
-  tree type, lhs, rhs, maskt;
-  rtx mem, reg, mask;
+  class expand_operand ops[4];
+  tree type, lhs, rhs, maskt, biast;
+  rtx mem, reg, mask, bias;
   insn_code icode;
 
   maskt = gimple_call_arg (stmt, 2);
@@ -2766,11 +2775,19 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
   create_fixed_operand (&ops[0], mem);
   create_input_operand (&ops[1], reg, TYPE_MODE (type));
   if (optab == len_store_optab)
-create_convert_operand_from (&ops[2], mask, 

Re: Building GNU Arm Embedded Toolchain for macOS/arm64

2021-11-02 Thread Iain Sandoe
Hi Romain,

> On 2 Nov 2021, at 20:12, Romain Goyet  wrote:
> 

> Thanks for your reply! However, what I'm referring to is a lot more simple 
> than that: I just want to build bare-metal binaries for Cortex-M devices from 
> a Darwin/arm64 host. Not to produce Mach-O binaries that can run on 
> Darwin/arm64.

> Essentially, I just made the toolchain at 
> https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm
>  run on macOS/arm64. It still only produces Cortex-M ELF binaries.

Right, the host-side support is a small set of patches from the larger project 
to make ‘native' aarch64 support.

by host-side I just mean enough to be able to build a cross-compiler (e.g. 
using clang) that runs on aarch64-darwin and produces output for some other 
target - I believe that is your intent as you describe here.

> I'm looking for the right person at Arm to help them ship a pre-built arm64 
> image on the page above. I'm really trying to solve a very practical problem. 
> My patches are super simple:
> - Fix their build-prerequisite.sh and build-toolchain.sh scripts
> - Fix GMP so it uses the proper macOS syntax on Arm
> - Fix libelf so it picks up stdlib.h

^^^ these are patches that would need to be sent to the appropriate “upstream” 
for the project in question - we don’t keep local patches to GMP or libelf in 
the GCC tree.

> - Fix a gcc-hook

OK, then please post that patch here, and we’ll take a look.

FWIW, I would expect for a general solution you’d need to deal (somehow) with 
the PCH issue ( I guess for embedded you might not care about Ada ;) )

Iain



Re: [PATCH 0/5] Fortran manual updates

2021-11-02 Thread Damian Rouson
On Tue, Nov 2, 2021 at 8:56 AM Sandra Loosemore 
wrote:

>
> ... I will hold off on that if it's going to cause problems.
>
> Thanks for taking on this task, Sandra.  I'm not aware of the technical
issues around mark-up formatting and the transition that's happening, but I
hope nothing holds up a long-needed update to the standards conformance
information.  Those are really important references that I've seen people
rely upon in decision-making that impacts projects.

Damian


Re: [PATCH] attribs: Allow optional second arg for attr deprecated [PR102049]

2021-11-02 Thread Marek Polacek via Gcc-patches
On Mon, Nov 01, 2021 at 05:15:03PM -0600, Martin Sebor wrote:
> On 10/11/21 9:17 AM, Marek Polacek via Gcc-patches wrote:
> > Any thoughts?
> 
> I'm a little unsure.  Clang just uses the replacement string
> as the text of the fix-it note as is, so it does nothing to
> help programmers make sure the replacement is in sync with
> what it's supposed to replace.  E.g., for this Clang output
> is below:
> 
> __attribute__ ((deprecated ("foo is bad", "use bar instead")))
> void foo (void);
> void baz (void) { foo (); }
> 
> int bar;
> 
> :2:19: warning: 'foo' is deprecated: foo is bad
> [-Wdeprecated-declarations]
> void baz (void) { foo (); }
>   ^~~
>   use bar instead
> 
> Since bar is a variable it's hard to see how it might be used
> instead of the function foo().  Fix-its, as I understand them,
> are meant not just as a visual clue but also to let IDEs and
> other tools automatically apply the fixes.  With buggy fix-its
> this obviously wouldn't work.
> 
> I think the replacement would be useful if it had to reference
> an existing symbol of the same kind, and if the compiler helped
> enforce it.  Otherwise it seems like a recipe for bit rot and
> for tings/tools not working well together.

OK, I think I'm not going to pursue this further.  I'll close the
PR and reference my patch in it, should this come up again in the
future.

Thanks,

> > 
> > On Thu, Sep 23, 2021 at 12:16:36PM -0400, Marek Polacek via Gcc-patches 
> > wrote:
> > > Clang implements something we don't have:
> > > 
> > > __attribute__((deprecated("message", "replacement")));
> > > 
> > > which seems pretty neat so I wrote this patch to add it to gcc.
> > > 
> > > It doesn't allow the optional second argument in the standard [[]]
> > > form so as not to clash with possible future standard additions.
> > > 
> > > I had hoped we could print a nice fix-it replacement hint, but that
> > > won't be possible until warn_deprecated_use gets something better than
> > > input_location.
> > > 
> > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > 
> > >   PR c++/102049
> > > 
> > > gcc/c-family/ChangeLog:
> > > 
> > >   * c-attribs.c (c_common_attribute_table): Increase max_len for
> > >   deprecated.
> > >   (handle_deprecated_attribute): Allow an optional second argument
> > >   in the GNU form of attribute deprecated.
> > > 
> > > gcc/c/ChangeLog:
> > > 
> > >   * c-parser.c (c_parser_std_attribute): Give a diagnostic when
> > >   the standard form of an attribute deprecated has a second argument.
> > > 
> > > gcc/ChangeLog:
> > > 
> > >   * doc/extend.texi: Document attribute deprecated with an
> > >   optional second argument.
> > >   * tree.c (warn_deprecated_use): Print the replacement argument,
> > >   if any.
> > > 
> > > gcc/testsuite/ChangeLog:
> > > 
> > >   * gcc.dg/c2x-attr-deprecated-3.c: Adjust dg-error.
> > >   * c-c++-common/Wdeprecated-arg-1.c: New test.
> > > ---
> > >   gcc/c-family/c-attribs.c  | 17 -
> > >   gcc/c/c-parser.c  |  8 ++
> > >   gcc/doc/extend.texi   | 24 ++
> > >   .../c-c++-common/Wdeprecated-arg-1.c  | 21 
> > >   gcc/testsuite/gcc.dg/c2x-attr-deprecated-3.c  |  2 +-
> > >   gcc/tree.c| 25 +++
> > >   6 files changed, 90 insertions(+), 7 deletions(-)
> > >   create mode 100644 gcc/testsuite/c-c++-common/Wdeprecated-arg-1.c
> > > 
> > > diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
> > > index 007b928c54b..ef857a9ae2c 100644
> > > --- a/gcc/c-family/c-attribs.c
> > > +++ b/gcc/c-family/c-attribs.c
> > > @@ -409,7 +409,7 @@ const struct attribute_spec 
> > > c_common_attribute_table[] =
> > >to prevent its usage in source code.  */
> > > { "no vops",0, 0, true,  false, false, false,
> > > handle_novops_attribute, NULL },
> > > -  { "deprecated", 0, 1, false, false, false, false,
> > > +  { "deprecated", 0, 2, false, false, false, false,
> > > handle_deprecated_attribute, NULL },
> > > { "unavailable",0, 1, false, false, false, false,
> > > handle_unavailable_attribute, NULL },
> > > @@ -4107,6 +4107,21 @@ handle_deprecated_attribute (tree *node, tree name,
> > > error ("deprecated message is not a string");
> > > *no_add_attrs = true;
> > >   }
> > > +  else if (TREE_CHAIN (args) != NULL_TREE)
> > > +{
> > > +  /* We allow an optional second argument in the GNU form of
> > > +  attribute deprecated, which specifies the replacement.  */
> > > +  if (flags & ATTR_FLAG_CXX11)
> > > + {
> > > +   error ("replacement argument only allowed in GNU attributes");
> > > +   *no_add_attrs = true;
> > > + }
> > > +  else if (TREE_CODE (TREE_VALUE (TREE_CHAIN (args))) != STRING_CST)
>

[PATCH 0/2] Re: [PATCH] libcpp: Implement -Wbidirectional for CVE-2021-42574 [PR103026]

2021-11-02 Thread David Malcolm via Gcc-patches
On Mon, 2021-11-01 at 12:36 -0400, Marek Polacek via Gcc-patches wrote:
> From a link below:
> "An issue was discovered in the Bidirectional Algorithm in the
> Unicode
> Specification through 14.0. It permits the visual reordering of
> characters via control sequences, which can be used to craft source
> code
> that renders different logic than the logical ordering of tokens
> ingested by compilers and interpreters. Adversaries can leverage this
> to
> encode source code for compilers accepting Unicode such that targeted
> vulnerabilities are introduced invisibly to human reviewers."
> 
> More info:
> https://nvd.nist.gov/vuln/detail/CVE-2021-42574
> https://trojansource.codes/
> 
> This is not a compiler bug.  However, to mitigate the problem, this
> patch
> implements -Wbidirectional=[none|unpaired|any] to warn about possibly
> misleading Unicode bidirectional characters the preprocessor may
> encounter.
> 
> The default is =unpaired, which warns about improperly terminated
> bidirectional characters; e.g. a LRE without its appertaining PDF. 
> The
> level =any warns about any use of bidirectional characters.
> 
> This patch handles both UCNs and UTF-8 characters.  UCNs designating
> bidi characters in identifiers are accepted since r204886.  Then
> r217144
> enabled -fextended-identifiers by default.  Extended characters in
> C/C++
> identifiers have been accepted since r275979.  However, this patch
> still
> warns about mixing UTF-8 and UCN bidi characters; there seems to be
> no
> good reason to allow mixing them.
> 
> We warn in different contexts: comments (both C and C++-style),
> string
> literals, character constants, and identifiers.  Expectedly, UCNs are
> ignored
> in comments and raw string literals.  The bidirectional characters
> can nest
> so this patch handles that as well.
> 
> I have not included nor tested this at all with Fortran (which also
> has
> string literals and line comments).
> 
> Dave M. posted patches improving diagnostic involving Unicode
> characters.
> This patch does not make use of this new infrastructure yet.

Challenge accepted :)

Here are a couple of patches on top of the v1 version of your patch
to make use of that new infrastructure.

The first patch is relatively non-invasive; the second patch reworks
things quite a bit to capture location_t values for the bidirectional
control characters, and use them in the diagnostics, with labelled
ranges, giving e.g.:

$ ./xgcc -B. -S ../../src/gcc/testsuite/c-c++-common/Wbidirectional-2.c 
-fdiagnostics-escape-format=bytes
../../src/gcc/testsuite/c-c++-common/Wbidirectional-2.c: In function ‘main’:
../../src/gcc/testsuite/c-c++-common/Wbidirectional-2.c:5:28: warning: unpaired 
UTF-8 bidirectional character detected [-Wbidirectional=]
5 | /* Say hello; newline<81>/*/ return 0 ;
  |    ^
  |  | |
  |  | end of bidirectional context
  |  U+2067 (RIGHT-TO-LEFT ISOLATE)

There's a more complicated example in the test case.

Not yet bootstrapped, but hopefully gives you some ideas on future
versions of the patch.

Note that the precise location_t values aren't going to make much sense
without the escaping feature [1], and I don't think that's backportable
to GCC 11, so these UX tweaks might be for GCC 12+ only.

Hope this is constructive
Dave

[1] what is a "column number" in a line of bidirectional text?  Right now
it's a 1-based offset w.r.t. the logical ordering of the characters, but
respecting tabs and counting certain characters as occupying two columns,
but it's not at all clear to me that there's such a thing as a
"column number" in bidirectional text.


David Malcolm (2):
  Flag CPP_W_BIDIRECTIONAL so that source lines are escaped
  Capture locations of bidi chars and underline ranges

 .../c-c++-common/Wbidirectional-ranges.c  |  54 
 libcpp/lex.c  | 254 ++
 2 files changed, 261 insertions(+), 47 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-ranges.c

-- 
2.26.3



[PATCH 1/2] Flag CPP_W_BIDIRECTIONAL so that source lines are escaped

2021-11-02 Thread David Malcolm via Gcc-patches
Before:

  Wbidirectional-1.c: In function ‘main’:
  Wbidirectional-1.c:6:43: warning: unpaired UTF-8 bidirectional character 
detected [-Wbidirectional=]
  6 | /*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
|   ^
  Wbidirectional-1.c:9:28: warning: unpaired UTF-8 bidirectional character 
detected [-Wbidirectional=]
  9 | /* end admins only ‮ { ⁦*/
|^

  Wbidirectional-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a 
context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidirectional=]
  6 | int LRE_‪_PDF_\u202c;
|   ^

After setting rich_loc.set_escape_on_output (true):

  Wbidirectional-1.c:6:43: warning: unpaired UTF-8 bidirectional character 
detected [-Wbidirectional=]
  6 | /* } if (isAdmin)  begin admins 
only */
|   
^
  Wbidirectional-1.c:9:28: warning: unpaired UTF-8 bidirectional character 
detected [-Wbidirectional=]
  9 | /* end admins only  { */
|^

  Wbidirectional-11.c:6:15: warning: UTF-8 vs UCN mismatch when closing a 
context by "U+202C (POP DIRECTIONAL FORMATTING)" [-Wbidirectional=]
  6 | int LRE__PDF_\u202c;
|   ^

libcpp/ChangeLog:
* lex.c (maybe_warn_bidi_on_close): Use a rich_location
and call set_escape_on_output (true) on it.
(maybe_warn_bidi_on_char): Likewise.

Signed-off-by: David Malcolm 
---
 libcpp/lex.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/libcpp/lex.c b/libcpp/lex.c
index f7a86fbe4b5..88aba307991 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1387,9 +1387,11 @@ maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar 
*p)
   const location_t loc
= linemap_position_for_column (pfile->line_table,
   CPP_BUF_COLUMN (pfile->buffer, p));
-  cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-"unpaired UTF-8 bidirectional character "
-"detected");
+  rich_location rich_loc (pfile->line_table, loc);
+  rich_loc.set_escape_on_output (true);
+  cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+ "unpaired UTF-8 bidirectional character "
+ "detected");
 }
   /* We're done with this context.  */
   bidi::on_close ();
@@ -1414,6 +1416,9 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar 
*p, bidi::kind kind,
   const location_t loc
= linemap_position_for_column (pfile->line_table,
   CPP_BUF_COLUMN (pfile->buffer, p));
+  rich_location rich_loc (pfile->line_table, loc);
+  rich_loc.set_escape_on_output (true);
+
   /* It seems excessive to warn about a PDI/PDF that is closing
 an opened context because we've already warned about the
 opening character.  Except warn when we have a UCN x UTF-8
@@ -1422,20 +1427,20 @@ maybe_warn_bidi_on_char (cpp_reader *pfile, const uchar 
*p, bidi::kind kind,
{
  if (warn_bidi == bidirectional_unpaired
  && bidi::current_ctx_ucn_p () != ucn_p)
-   cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-  "UTF-8 vs UCN mismatch when closing "
-  "a context by \"%s\"", bidi::to_str (kind));
+   cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+   "UTF-8 vs UCN mismatch when closing "
+   "a context by \"%s\"", bidi::to_str (kind));
}
   else if (warn_bidi == bidirectional_any)
{
  if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
-   cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-  "\"%s\" is closing an unopened context",
-  bidi::to_str (kind));
+   cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+   "\"%s\" is closing an unopened context",
+   bidi::to_str (kind));
  else
-   cpp_warning_with_line (pfile, CPP_W_BIDIRECTIONAL, loc, 0,
-  "found problematic Unicode character \"%s\"",
-  bidi::to_str (kind));
+   cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
+   "found problematic Unicode character \"%s\"",
+   bidi::to_str (kind));
}
 }
   /* We're done with this context.  */
-- 
2.26.3



[PATCH 2/2] Capture locations of bidi chars and underline ranges

2021-11-02 Thread David Malcolm via Gcc-patches
This patch converts the bidi::vec to use a struct so that we can
capture location_t values for the bidirectional control characters,
and uses these to label sources ranges in the diagnostics.

The effect on the output can be seen in the new testcase.

gcc/testsuite/ChangeLog:
* c-c++-common/Wbidirectional-ranges.c: New test.

libcpp/ChangeLog:
* lex.c (struct bidi::context): New.
(bidi::vec): Convert to a vec of context rather than unsigned char.
(bidi::current_ctx): Update for above change.
(bidi::current_ctx_ucn_p): Likewise.
(bidi::current_ctx_loc): New.
(bidi::on_char): Update for usage of context struct.  Add "loc"
param and pass it when pushing contexts.
(get_location_for_byte_range_in_cur_line): New.
(get_bidi_utf8): Rename to...
(get_bidi_utf8_1): ...this, reintroducing...
(get_bidi_utf8): ...as a wrapper, setting *OUT when the result is
not NONE.
(get_bidi_ucn): Rename to...
(get_bidi_ucn_1): ...this, reintroducing...
(get_bidi_ucn): ...as a wrapper, setting *OUT when the result is
not NONE.
(class unpaired_bidi_rich_location): New.
(maybe_warn_bidi_on_close): Use unpaired_bidi_rich_location when
reporting on unpaired bidi chars.  Split into singular vs plural
spellings.
(maybe_warn_bidi_on_char): Pass in a location_t rather than a
const uchar * and use it when emitting warnings, and when calling
bidi::on_char.
(_cpp_skip_block_comment): Capture location when kind is not NONE
and pass it to maybe_warn_bidi_on_char.
(skip_line_comment): Likewise.
(forms_identifier_p): Likewise.
(lex_raw_string): Likewise.
(lex_string): Likewise.

Signed-off-by: David Malcolm 
---
 .../c-c++-common/Wbidirectional-ranges.c  |  54 
 libcpp/lex.c  | 241 ++
 2 files changed, 252 insertions(+), 43 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/Wbidirectional-ranges.c

diff --git a/gcc/testsuite/c-c++-common/Wbidirectional-ranges.c 
b/gcc/testsuite/c-c++-common/Wbidirectional-ranges.c
new file mode 100644
index 000..a41ae47dc30
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/Wbidirectional-ranges.c
@@ -0,0 +1,54 @@
+/* PR preprocessor/103026 */
+/* { dg-do compile } */
+/* { dg-options "-Wbidirectional=unpaired -fdiagnostics-show-caret" } */
+/* Verify that we escape and underline pertinent bidirectional characters
+   when quoting the source.  */
+
+int test_unpaired_bidi () {
+int isAdmin = 0;
+/*‮ } ⁦if (isAdmin)⁩ ⁦ begin admins only */
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+#if 0
+   { dg-begin-multiline-output "" }
+ /* } if (isAdmin)  begin admins only */
+   ^
+   |   |   |
+   |   |   end 
of bidirectional context
+   U+202E (RIGHT-TO-LEFT OVERRIDE) U+2066 (LEFT-TO-RIGHT ISOLATE)
+   { dg-end-multiline-output "" }
+#endif
+
+__builtin_printf("You are an admin.\n");
+/* end admins only ‮ { ⁦*/
+/* { dg-warning "bidirectional" "" { target *-*-* } .-1 } */
+#if 0
+   { dg-begin-multiline-output "" }
+ /* end admins only  { */
+    ^
+|  ||
+|  |end of bidirectional context
+|  U+2066 (LEFT-TO-RIGHT ISOLATE)
+U+202E (RIGHT-TO-LEFT OVERRIDE)
+   { dg-end-multiline-output "" }
+#endif
+
+return 0;
+}
+
+int LRE_‪_PDF_\u202c;
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+#if 0
+   { dg-begin-multiline-output "" }
+ int LRE__PDF_\u202c;
+  ^~
+   { dg-end-multiline-output "" }
+#endif
+
+const char *s1 = "LRE_‪_PDF_\u202c";
+/* { dg-warning "mismatch" "" { target *-*-* } .-1 } */
+#if 0
+   { dg-begin-multiline-output "" }
+ const char *s1 = "LRE__PDF_\u202c";
+    ^~
+   { dg-end-multiline-output "" }
+#endif
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 88aba307991..9e5531fb125 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1172,11 +1172,34 @@ namespace bidi {
   /* All the UTF-8 encodings of bidi characters start with E2.  */
   constexpr uchar utf8_start = 0xe2;
 
+  struct context
+  {
+context () {}
+context (location_t loc, kind k, bool pdf, bool ucn)
+: m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
+{
+}
+
+kind get_pop_kind () const
+{
+  return m_pdf ? kind::PDF : kind::PDI;
+}
+bool ucn_p () const
+{
+  return m_ucn;
+}
+
+location_t m_loc;
+kind m_kind;
+unsigned m_pdf : 1;
+unsigned m_ucn : 1;
+  };
+
   /* A vector hol

  1   2   >