date:20201210

[PATCH] sanitizer: do not ICE for pointer cmp/sub

2020-12-10 Thread Martin Liška


Hello.

In C FE we have troubles to instrument top-level pointer comparison
(and subtraction):

/home/marxin/Programming/gcc/gcc/testsuite/c-c++-common/asan/pr98204.c:5:1: 
internal compiler error: in pointer_diff, at c/c-typeck.c:3954
5 | static long i=((char*)&(v.c)-(char*)&v);
  | ^~

and

 gcc /home/marxin/Programming/gcc/gcc/testsuite/c-c++-common/asan/pr98204.c -c 
-fsanitize=address,pointer-compare
/home/marxin/Programming/gcc/gcc/testsuite/c-c++-common/asan/pr98204.c:6:16: 
error: initializer element is not constant
6 | static long i2=((char*)&(v.c)<(char*)&v);
  |^

The patch fixes that by not instrumenting that when current_function_decl
is NULL_TREE.

On the contrary, C++ is fine with that and does the emission in ctor:

$ + /home/marxin/Programming/gcc/gcc/testsuite/c-c++-common/asan/pr98204.c -c 
-fsanitize=address,pointer-compare,pointer-subtract 
-fdump-tree-gimple=/dev/stdout
...
void __static_initialization_and_destruction_0 (int __initialize_p, int 
__priority)
{
  if (__initialize_p == 1) goto ; else goto ;
  :
  __builtin___asan_before_dynamic_init 
("/home/marxin/Programming/gcc/gcc/testsuite/c-c++-common/asan/pr98204.c");
  if (__priority == 65535) goto ; else goto ;
  :
  __builtin___sanitizer_ptr_sub (&v.c, &v);
  i = 0;
  __builtin___sanitizer_ptr_cmp (&v.c, &v);

Are we able to do something similar for C FE, or are we fine with the suggested 
patch?

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/c/ChangeLog:

PR sanitizer/98204
* c-typeck.c (pointer_diff): Do not emit a top-level
sanitization.
(build_binary_op): Likewise.

gcc/testsuite/ChangeLog:

PR sanitizer/98204
* c-c++-common/asan/pr98204.c: New test.
---
 gcc/c/c-typeck.c  | 6 +++---
 gcc/testsuite/c-c++-common/asan/pr98204.c | 6 ++
 2 files changed, 9 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/asan/pr98204.c

diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 138af073925..7d58e8de342 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -3949,10 +3949,9 @@ pointer_diff (location_t loc, tree op0, tree op1, tree 
*instrument_expr)
 pedwarn (loc, OPT_Wpointer_arith,
 "pointer to a function used in subtraction");
 
-  if (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT))

+  if (current_function_decl != NULL_TREE
+  && sanitize_flags_p (SANITIZE_POINTER_SUBTRACT))
 {
-  gcc_assert (current_function_decl != NULL_TREE);
-
   op0 = save_expr (op0);
   op1 = save_expr (op1);
 
@@ -12324,6 +12323,7 @@ build_binary_op (location_t location, enum tree_code code,

}
 
   if ((code0 == POINTER_TYPE || code1 == POINTER_TYPE)

+ && current_function_decl != NULL_TREE
  && sanitize_flags_p (SANITIZE_POINTER_COMPARE))
{
  op0 = save_expr (op0);
diff --git a/gcc/testsuite/c-c++-common/asan/pr98204.c 
b/gcc/testsuite/c-c++-common/asan/pr98204.c
new file mode 100644
index 000..7532646d712
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/pr98204.c
@@ -0,0 +1,6 @@
+/* PR sanitizer/98204 */
+/* { dg-options "-fsanitize=address,pointer-subtract,pointer-compare" } */
+
+struct{int c;}v;
+static long i=((char*)&(v.c)-(char*)&v);
+static long i2=((char*)&(v.c)<(char*)&v);
--
2.29.2

[PATCH] tree-optimization/98211 - fix bogus vectorization of conversion

2020-12-10 Thread Richard Biener

Pattern recog incompletely handles some bool cases but we shouldn't
miscompile as a result but not vectorize.  Unfortunately
vectorizable_assignment lets invalid conversions (that
vectorizable_conversion rejects) slip through.  The following
rectifies that.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

2020-12-10  Richard Biener  

PR tree-optimization/98211
* tree-vect-stmts.c (vectorizable_assignment): Disallow
invalid conversions to bool vector types.

* gcc.dg/pr98211.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr98211.c | 51 ++
 gcc/tree-vect-stmts.c  | 11 
 2 files changed, 62 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr98211.c

diff --git a/gcc/testsuite/gcc.dg/pr98211.c b/gcc/testsuite/gcc.dg/pr98211.c
new file mode 100644
index 000..cea371dcee7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr98211.c
@@ -0,0 +1,51 @@
+/* { dg-do run } */
+/* { dg-options "-std=gnu90 -O3 -fgimple" } */
+
+int test_var_3;
+short arr_20[16];
+void __GIMPLE (ssa,startwith("slp"))
+test (int var_1, short int a, short int b, short int c, short int d)
+{
+  _Bool tem2;
+  _Bool tem;
+  unsigned int i_5;
+  int _24;
+  _Bool _28;
+  short int _30;
+  short int _32;
+
+  __BB(2):
+  _24 = test_var_3;
+  tem_25 = _24 != 0;
+  tem2_26 = var_1_11(D) != 0;
+  _28 = tem_25 | tem2_26;
+  _30 = _28 !=  _Literal (_Bool) 0 ? a_16(D) : b_15(D);
+  arr_20[0u] = _30;
+  _32 = _28 != _Literal (_Bool) 0 ? c_19(D) : d_18(D);
+  arr_20[8u] = _32;
+  arr_20[1u] = _30;
+  arr_20[9u] = _32;
+  arr_20[2u] = _30;
+  arr_20[10u] = _32;
+  arr_20[3u] = _30;
+  arr_20[11u] = _32;
+  arr_20[4u] = _30;
+  arr_20[12u] = _32;
+  arr_20[5u] = _30;
+  arr_20[13u] = _32;
+  arr_20[6u] = _30;
+  arr_20[14u] = _32;
+  arr_20[7u] = _30;
+  arr_20[15u] = _32;
+  return;
+}
+
+
+int
+main()
+{
+  test (1, 0x88, 0x77, 0x77, 0x88);
+  if (arr_20[0] != 0x88)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index a4980a931a9..d3ab8aa1c29 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5123,6 +5123,17 @@ vectorizable_assignment (vec_info *vinfo,
   GET_MODE_SIZE (TYPE_MODE (vectype_in)
 return false;
 
+  if (VECTOR_BOOLEAN_TYPE_P (vectype)
+  && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
+{
+  if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+"can't convert between boolean and non "
+"boolean vectors %T\n", TREE_TYPE (op));
+
+  return false;
+}
+
   /* We do not handle bit-precision changes.  */
   if ((CONVERT_EXPR_CODE_P (code)
|| code == VIEW_CONVERT_EXPR)
-- 
2.26.2

[PATCH] Allow scalar fallback for pattern root stmt

2020-12-10 Thread Richard Biener

This adjusts the SLP build to allow a pattern root stmt to be
built from scalars.  I've noticed this in PR98211 where we fail
to promote a SLP subtree to a simple splat operation and instead
emit a series of uniform vector operations.  The bb-slp-div-1.c
testcase is now vectorized on x86_64 but only the store so I
adjusted it to expect the load to be vectorized.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2020-12-10  Richard Biener  

* tree-vect-slp.c (vect_get_and_check_slp_defs): Do
not mark the defs to occur in a pattern if it is the
pattern root and record the original stmt defs in that
case.

* gcc.dg/vect/bb-slp-div-1.c: Expect the load to be
vectorized.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-div-1.c |  5 -
 gcc/tree-vect-slp.c  | 15 ---
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-div-1.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-div-1.c
index 87ffc9b897b..1eea9233b70 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-div-1.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-div-1.c
@@ -16,4 +16,7 @@ f (void)
   x[7] /= 9;
 }
 
-/* { dg-final { scan-tree-dump "optimized: basic block" "slp2" { xfail *-*-* } 
} } */
+/* We can vectorize the store from a CTOR built from scalar division
+   results but ideally we'd like to see vectorizing the load and the
+   division as well.  */
+/* { dg-final { scan-tree-dump "transform load" "slp2" { xfail *-*-* } } } */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index d248ce2c3f7..e93e9c7a2d3 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -544,12 +544,21 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
  continue;
}
 
-  if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
-   oprnd_info->any_pattern = true;
-
   oprnd_info->def_stmts.quick_push (def_stmt_info);
   oprnd_info->ops.quick_push (oprnd);
 
+  if (def_stmt_info
+ && is_pattern_stmt_p (def_stmt_info))
+   {
+ if (STMT_VINFO_RELATED_STMT (vect_orig_stmt (def_stmt_info))
+ != def_stmt_info)
+   oprnd_info->any_pattern = true;
+ else
+   /* If we promote this to external use the original stmt def.  */
+   oprnd_info->ops.last ()
+ = gimple_get_lhs (vect_orig_stmt (def_stmt_info)->stmt);
+   }
+
   /* If there's a extern def on a backedge make sure we can
 code-generate at the region start.
 ???  This is another case that could be fixed by adjusting
-- 
2.26.2

Re: [PATCH] Remove misleading debug line entries

2020-12-10 Thread Richard Biener

On Wed, 9 Dec 2020, Bernd Edlinger wrote:

> On 12/8/20 7:57 PM, Bernd Edlinger wrote:
> > On 12/8/20 11:35 AM, Richard Biener wrote:
> >>
> >> + {
> >> +   /* Remove a nonbind marker when the outer scope of the
> >> +  inline function is completely removed.  */
> >> +   if (gimple_debug_nonbind_marker_p (stmt)
> >> +   && BLOCK_ABSTRACT_ORIGIN (b))
> >> + {
> >> +   while (TREE_CODE (b) == BLOCK
> >> +  && !inlined_function_outer_scope_p (b))
> >> + b = BLOCK_SUPERCONTEXT (b);
> >>
> >> So given we never remove a inlined_function_outer_scope_p BLOCK from
> >> the block tree can we assert that we find such a BLOCK?  If we never
> >> elide those BLOCKs how can it happen that we elide it in the end?
> 
> We can remove inlined function outer scope when they have no subblocks
> any more, or only unused subblocks, and there is an exception from the
> rule when no debug info is generated, that is due to this:
> 
> >else if (!flag_auto_profile && debug_info_level == DINFO_LEVEL_NONE
> > && !optinfo_wants_inlining_info_p ())
> >  {
> >/* Even for -g0 don't prune outer scopes from artificial
> >   functions, otherwise diagnostics using tree_nonartificial_location
> >   will not be emitted properly.  */
> >if (inlined_function_outer_scope_p (scope))
> >  {
> >tree ao = BLOCK_ORIGIN (scope);
> >if (ao
> >&& TREE_CODE (ao) == FUNCTION_DECL
> >&& DECL_DECLARED_INLINE_P (ao)
> >&& lookup_attribute ("artificial", DECL_ATTRIBUTES (ao)))
> >  unused = false;
> >  }
> >  }
> > 
> 
> I instrumented the remove_unused_scope_block_p now as follows,
> to better understand what happens here:
> 
> diff --git a/gcc/tree-ssa-live.c b/gcc/tree-ssa-live.c
> index 9ea24a1..3dd859c 100644
> --- a/gcc/tree-ssa-live.c
> +++ b/gcc/tree-ssa-live.c
> @@ -525,9 +525,15 @@ remove_unused_scope_block_p (tree scope, bool 
> in_ctor_dtor_block)
> *t = BLOCK_SUBBLOCKS (*t);
> while (BLOCK_CHAIN (*t))
>   {
> +   gcc_assert (TREE_USED (*t));
> +   if (debug_info_level != DINFO_LEVEL_NONE)
> + gcc_assert (!inlined_function_outer_scope_p 
> (BLOCK_SUPERCONTEXT (*t)));
> BLOCK_SUPERCONTEXT (*t) = supercontext;
> t = &BLOCK_CHAIN (*t);
>   }
> +   gcc_assert (TREE_USED (*t));
> +   if (debug_info_level != DINFO_LEVEL_NONE)
> + gcc_assert (!inlined_function_outer_scope_p (BLOCK_SUPERCONTEXT 
> (*t)));
> BLOCK_CHAIN (*t) = next;
> BLOCK_SUPERCONTEXT (*t) = supercontext;
> t = &BLOCK_CHAIN (*t);
> 
> This survives a bootstrap, but I consider that just as an experiment...
> 
> This means that the BLOCK_SUPERCONTEXT pointers never skip
> an inlined_function_outer_scope_p, *except* when no debug info is
> generated, but then it is fine, as there are either no debug_nonbind_marker_p,
> or it would not matter, if an outer scope is missed.
> 
> After the above loop runs, the BLOCK_SUBBLOCKS->BLOCK_CHAIN have only
> Blocks with TREE_USED
> Blocks with !TREE_USED are removed from the SUBBLOCKS->CHAIN list, but
> have still a valid BLOCK_SUPERCONTEXT. However BLOCK_CHAIN and BLOCK_SUBBLOCKS
> are not used any more, and could theoretically misused for something, but
> fortunately that is not necessary.
> 
> I think that result suggests that the proposed patch does the right thing,
> already as-is.
> 
> 
> Do you agree?

Yes, I think that in the end with all the constraints under which we
elide blocks the patch does the correct thing.  What makes me uneasy
is how un-obvious that is ;)  That said, the comment should probably
say

  Elide debug marker stmts that have an associated BLOCK from an
  inline instance removed with also the outermost scope BLOCK of
  said inline instance removed.  If the outermost scope BLOCK of
  said inline instance is preserved use that in place of the removed
  BLOCK.  That keeps the marker associated to the correct inline
  instance (or no inline instance in case it was not from an inline
  instance).

That at least makes the intent clear while the implementation
might still not be 100% obviously correct.  That's why I suggested
to track "inline instance outermost scope BLOCK of BLOCK" in the
function that messes with the BLOCK tree.

But I guess the comment is enough to reverse-engineer that.

So OK with a comment along this line (if I got it correct, that is ;))

Thanks,
Richard.

Re: [1/2][TREE] Add WIDEN_PLUS, WIDEN_MINUS pretty print

2020-12-10 Thread Richard Biener

On Thu, 10 Dec 2020, Joel Hutton wrote:

> Hi all,
> 
> This adds missing pretty print for WIDEN_PLUS/MINUS and 
> VEC_WIDEN_PLUS/MINUS_HI/LO
> 
> Bootstrapped and regression tested all together on aarch64.
> 
> Ok for trunk?

OK.

> Add WIDEN_PLUS, WIDEN_MINUS pretty print
> 
> Add 'w+'/'w-' as WIDEN_PLUS/WIDEN_MINUS respectively.
> Add 'VEC_WIDEN_PLUS/MINUS_HI/LO<...>' for   VEC_WIDEN_PLUS/MINUS_HI/LO
> 
> gcc/ChangeLog:
> 
>  * tree-pretty-print.c (dump_generic_node): Add case for
>  VEC_WIDEN_(PLUS/MINUS)_(HI/LO)_EXPR and WIDEN_(PLUS/MINUS)_EXPR.
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [2/2][VECT] pr97929 fix

2020-12-10 Thread Richard Biener

On Thu, 10 Dec 2020, Joel Hutton wrote:

> Hi all,
> 
> This patch addresses PR97929 by adding a missing case for WIDEN_PLUS/MINUS in 
> vect_get_smallest_scalar_type. It also introduces a test to check for 
> regression. 
> 
> One thing to note is that I saw a failure on c-c++-common/builtins.c which 
> disappeared when I ran the test again. I assume this is due to the test being 
> unreliable.
> 
> Bootstrapped and regression tested all together.
> 
> Ok for trunk?

OK.

Thanks,
Richard.

> [VECT] pr97929 fix
> 
> This addresses pr97929. The case for WIDEN_PLUS and WIDEN_MINUS were
> missing in vect_get_smallest_scalar_type.
> 
> gcc/ChangeLog:
> 
> PR tree-optimization/97929
> * tree-vect-data-refs.c (vect_get_smallest_scalar_type): Add
> WIDEN_PLUS/WIDEN_MINUS case.
> 
> gcc/testsuite/ChangeLog:
> 
> * gcc.dg/vect/pr97929.c: New test.

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH] dojump: Fix up probabilities splitting in dojump.c comparison splitting [PR98212]

2020-12-10 Thread Eric Botcazou

> 2020-12-10  Jakub Jelinek  
> 
>   PR rtl-optimization/98212
>   * dojump.c (do_compare_rtx_and_jump): Change computation of
>   first_prob for and_them and don't invert prob around it.
> 
>   * gcc.dg/predict-8.c: Adjust expected probability.
> 
> --- gcc/dojump.c.jj   2020-12-09 15:11:17.042888002 +0100
> +++ gcc/dojump.c  2020-12-10 12:24:56.991844956 +0100
> @@ -1138,19 +1138,21 @@ do_compare_rtx_and_jump (rtx op0, rtx op
>   cprob = cprob.apply_scale (99, 100);
> else
>   cprob = profile_probability::even ();
> -   /* We want to split:
> +   /* For and_them we want to split:
>if (x) goto t; // prob;
> +  goto f;
>into
> -  if (a) goto t; // first_prob;
> +  if (a) ; else goto f; // first_prob for ;
> +// 1 - first_prob for goto 
f;
>if (b) goto t; // prob;
> +  goto f;
>such that the overall probability of jumping to t
> -  remains the same and first_prob is prob * cprob.  */
> +  remains the same and first_prob is 1 - prob * (1 - 
cprob).  */



> if (and_them)
>   {
> rtx_code_label *dest_label;
> -   prob = prob.invert ();
> -   profile_probability first_prob = prob.split 
(cprob).invert ();
> -   prob = prob.invert ();
> +   profile_probability first_prob
> + = prob.split (cprob.invert ()).invert ();
> /* If we only jump if true, just bypass the second 
jump.  */
> if (! if_false_label)
>   {
> @@ -1163,6 +1170,15 @@ do_compare_rtx_and_jump (rtx op0, rtx op
>do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> mode, size, dest_label, NULL, first_prob);
>   }
> +   /* For !and_them we want to split:
> +  if (x) goto t; // prob;
> +  goto f;
> +  into
> +  if (a) goto t; // first_prob;
> +  if (b) goto t; // prob;
> +  goto f;
> +  such that the overall probability of jumping to t
> +  remains the same and first_prob is prob * cprob.  */
>else
>   {
> profile_probability first_prob = prob.split (cprob);

prob.split adjusts prob so this needs to be reflected in the comment (maybe 
"adjusted prob" or the formula if it is simple).  Otherwise looks good to me.

-- 
Eric Botcazou

Re: [PATCH] data-ref: Rework integer handling in split_constant_offset [PR98069]

2020-12-10 Thread Richard Biener

On Wed, 9 Dec 2020, Richard Sandiford wrote:

> PR98069 is about a case in which split_constant_offset miscategorises
> an expression of the form:
> 
>   int foo;
>   …
>   POINTER_PLUS_EXPR
> 
> as:
> 
>   base: base
>   offset: (sizetype) (-foo) * size
>   init: INT_MIN * size
> 
> “-foo” overflows when “foo” is INT_MIN, whereas the original expression
> didn't overflow in that case.
> 
> As discussed in the PR trail, we could simply ignore the fact that
> int overflow is undefined and treat it as a wrapping type, but that
> is likely to pessimise quite a few cases.
> 
> This patch instead reworks split_constant_offset so that:
> 
> - it treats integer operations as having an implicit cast to sizetype
> - for integer operations, the returned VAR has type sizetype
> 
> In other words, the problem becomes to express:
> 
>   (sizetype) (OP0 CODE OP1)
> 
> as:
> 
>   VAR:sizetype + (sizetype) OFF:ssizetype
> 
> The top-level integer split_constant_offset will (usually) be a sizetype
> POINTER_PLUS operand, so the extra cast to sizetype disappears.  But adding
> the cast allows the conversion handling to defer a lot of the difficult
> cases to the recursive split_constant_offset call, which can detect
> overflow on individual operations.
> 
> The net effect is to analyse the access above as:
> 
>   base: base
>   offset: -(sizetype) foo * size
>   init: INT_MIN * size
> 
> See the comments in the patch for more details.
> 
> Tested on aarch64-linux-gnu so far (with and without SVE), but will
> test more widely overnight.

Nice - thanks for refactoring it this way.  I've one small question
at the very end ...

> Thanks,
> Richard
> 
> 
> gcc/
>   PR tree-optimization/98069
>   * tree-data-ref.c (compute_distributive_range): New function.
>   (nop_conversion_for_offset_p): Likewise.
>   (split_constant_offset): In the internal overload, treat integer
>   expressions as having an implicit cast to sizetype and express
>   them accordingly.  Pass back the range of the original (uncast)
>   expression in a new range parameter.
>   (split_constant_offset_1): Likewise.  Rework the handling of
>   conversions to account for the implicit sizetype casts.
> ---
>  gcc/testsuite/gcc.dg/vect/pr98069.c |  22 ++
>  gcc/tree-data-ref.c | 427 +---
>  2 files changed, 352 insertions(+), 97 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr98069.c
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/pr98069.c 
> b/gcc/testsuite/gcc.dg/vect/pr98069.c
> new file mode 100644
> index 000..e60549fb30a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr98069.c
> @@ -0,0 +1,22 @@
> +long long int var_3 = -166416893043554447LL;
> +short var_8 = (short)27092;
> +unsigned int var_17 = 75036300U;
> +short arr_165[23];
> +
> +static long c(long e, long f) { return f ? e : f; }
> +void __attribute((noipa)) test()
> +{
> +  for (int b = 0; b < 19; b = var_17)
> +for (int d = (int)(~c(-2147483647 - 1, var_3)) - 2147483647; d < 22; d++)
> +  arr_165[d] = var_8;
> +}
> +
> +int main()
> +{
> +  for (unsigned i_3 = 0; i_3 < 23; ++i_3)
> +arr_165[i_3] = (short)-8885;
> +  test();
> +  if (arr_165[0] != 27092)
> +__builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
> index e8308ce8250..926553b5cac 100644
> --- a/gcc/tree-data-ref.c
> +++ b/gcc/tree-data-ref.c
> @@ -97,6 +97,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "tree-eh.h"
>  #include "ssa.h"
>  #include "internal-fn.h"
> +#include "range-op.h"
> +#include "vr-values.h"
>  
>  static struct datadep_stats
>  {
> @@ -581,26 +583,196 @@ debug_ddrs (vec ddrs)
>dump_ddrs (stderr, ddrs);
>  }
>  
> +/* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
> +   OP0 CODE OP1, where:
> +
> +   - OP0 CODE OP1 has integral type TYPE
> +   - the range of OP0 is given by OP0_RANGE and
> +   - the range of OP1 is given by OP1_RANGE.
> +
> +   Independently of RESULT_RANGE, try to compute:
> +
> + DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
> +  - (sizetype) (OP0 CODE OP1)
> +
> +   as a constant and subtract DELTA from the ssizetype constant in *OFF.
> +   Return true on success, or false if DELTA is not known at compile time.
> +
> +   Truncation and sign changes are known to distribute over CODE, i.e.
> +
> + (itype) (A CODE B) == (itype) A CODE (itype) B
> +
> +   for any integral type ITYPE whose precision is no greater than the
> +   precision of A and B.  */
> +
> +static bool
> +compute_distributive_range (tree type, value_range &op0_range,
> + tree_code code, value_range &op1_range,
> + tree *off, value_range *result_range)
> +{
> +  gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
> +  if (result_range)
> +{
> +  range_operator *op = range_op_handler (code, type);
> +  op->fold_range (*result_range, type, op0_range, op1_

Re: [PATCH] data-ref: Rework integer handling in split_constant_offset [PR98069]

2020-12-10 Thread Richard Sandiford via Gcc-patches

Richard Biener  writes:
>> @@ -812,33 +997,80 @@ split_constant_offset_1 (tree type, tree op0, enum 
>> tree_code code, tree op1,
>>  }
>>  }
>>  
>> -/* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
>> -   will be ssizetype.  */
>> +/* If EXP has pointer type, try to express it as:
>> +
>> + POINTER_PLUS <*VAR, (sizetype) *OFF>
>> +
>> +   where:
>> +
>> +   - *VAR has the same type as EXP
>> +   - *OFF is a constant of type ssizetype.
>> +
>> +   If EXP has an integral type, try to express (sizetype) EXP as:
>> +
>> + *VAR + (sizetype) *OFF
>> +
>> +   where:
>> +
>> +   - *VAR has type sizetype
>> +   - *OFF is a constant of type ssizetype.
>> +
>> +   If EXP_RANGE is nonnull, set it to the range of EXP.
>> +
>> +   CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
>> +   visited.  LIMIT counts down the number of SSA names that we are
>> +   allowed to process before giving up.  */
>>  
>>  static void
>> -split_constant_offset (tree exp, tree *var, tree *off,
>> +split_constant_offset (tree exp, tree *var, tree *off, value_range 
>> *exp_range,
>> hash_map > &cache,
>> unsigned *limit)
>>  {
>> -  tree type = TREE_TYPE (exp), op0, op1, e, o;
>> +  tree type = TREE_TYPE (exp), op0, op1;
>>enum tree_code code;
>>  
>> -  *var = exp;
>> -  *off = ssize_int (0);
>> +  code = TREE_CODE (exp);
>> +  if (exp_range)
>> +{
>> +  *exp_range = type;
>> +  if (code == SSA_NAME)
>> +{
>> +  wide_int var_min, var_max;
>> +  value_range_kind vr_kind = get_range_info (exp, &var_min, &var_max);
>> +  wide_int var_nonzero = get_nonzero_bits (exp);
>> +  vr_kind = intersect_range_with_nonzero_bits (vr_kind,
>> +   &var_min, &var_max,
>> +   var_nonzero,
>> +   TYPE_SIGN (type));
>> +  if (vr_kind == VR_RANGE)
>> +*exp_range = value_range (type, var_min, var_max);
>> +}
>> +}
>>  
>> -  if (tree_is_chrec (exp)
>> -  || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
>> -return;
>> +  if (!tree_is_chrec (exp)
>> +  && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
>> +{
>> +  extract_ops_from_tree (exp, &code, &op0, &op1);
>> +  if (split_constant_offset_1 (type, op0, code, op1, var, off,
>> +   exp_range, cache, limit))
>> +return;
>> +}
>>  
>> -  code = TREE_CODE (exp);
>> -  extract_ops_from_tree (exp, &code, &op0, &op1);
>> -  if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, limit))
>> +  *var = exp;
>> +  if (INTEGRAL_TYPE_P (type))
>> +*var = fold_convert (sizetype, *var);
>> +  *off = ssize_int (0);
>> +  if (exp_range && code != SSA_NAME)
>>  {
>> -  *var = e;
>> -  *off = o;
>> +  wide_int var_min, var_max;
>> +  if (determine_value_range (exp, &var_min, &var_max) == VR_RANGE)
>> +*exp_range = value_range (type, var_min, var_max);
>
> So this call is only for the case the recursion failed, otherwise
> we build exp_range during the recursive call, correct?

Yeah, that's right.

> The patch is OK.

Thanks.  For the record, now also tested on x86_64-linux-gnu.

I'm not sure what to do about backports though.  It seems a bit
invasive for GCC 8 and 9 at least (PR95396).

Richard

Re: [PATCH 1/5] arm: Auto-vectorization for MVE: vand

2020-12-10 Thread Christophe Lyon via Gcc-patches

On Tue, 8 Dec 2020 at 15:00, Kyrylo Tkachov  wrote:

>
>
> > -Original Message-
> > From: Christophe Lyon 
> > Sent: 08 December 2020 13:59
> > To: Kyrylo Tkachov 
> > Cc: gcc-patches@gcc.gnu.org
> > Subject: Re: [PATCH 1/5] arm: Auto-vectorization for MVE: vand
> >
> > On Tue, 8 Dec 2020 at 14:19, Kyrylo Tkachov 
> > wrote:
> > >
> > > Hi Christophe
> > >
> > > > -Original Message-
> > > > From: Gcc-patches  On Behalf Of
> > > > Christophe Lyon via Gcc-patches
> > > > Sent: 08 December 2020 13:06
> > > > To: gcc-patches@gcc.gnu.org
> > > > Subject: [PATCH 1/5] arm: Auto-vectorization for MVE: vand
> > > >
> > > > This patch enables MVE vandq instructions for auto-vectorization.
> MVE
> > > > vandq insns in mve.md are modified to use 'and' instead of unspec
> > > > expression to support and3.  The and3 expander is added
> > to
> > > > vec-common.md
> > > >
> > > > 2020-12-03  Christophe Lyon  
> > > >
> > > >   gcc/
> > > >   * config/arm/iterators.md (supf): Remove VANDQ_S and VANDQ_U.
> > > >   (VANQ): Remove.
> > > >   (VDQ): Add TARGET_HAVE_MVE condition where relevant.
> > > >   * config/arm/mve.md (mve_vandq_u): New entry for vand
> > > >   instruction using expression 'and'.
> > > >   (mve_vandq_s): New expander.
> > > >   (mve_vaddq_n_f): Use 'and' code instead of unspec.
> > > >   * config/arm/neon.md (and3): Rename into
> > > > and3_neon.
> > > >   * config/arm/predicates.md (imm_for_neon_inv_logic_operand):
> > > >   Enable for MVE.
> > > >   * config/arm/unspecs.md (VANDQ_S, VANDQ_U, VANDQ_F):
> > > > Remove.
> > > >   * config/arm/vec-common.md (and3): New expander.
> > > >
> > > >   gcc/testsuite/
> > > >   * gcc.target/arm/simd/mve-vand.c: New test.
> > > > ---
> > > >  gcc/config/arm/iterators.md  | 11 +++--
> > > >  gcc/config/arm/mve.md| 40 +-
> > > >  gcc/config/arm/neon.md   |  2 +-
> > > >  gcc/config/arm/predicates.md |  2 +-
> > > >  gcc/config/arm/unspecs.md|  3 --
> > > >  gcc/config/arm/vec-common.md |  8 
> > > >  gcc/testsuite/gcc.target/arm/simd/mve-vand.c | 63
> > > > 
> > > >  7 files changed, 109 insertions(+), 20 deletions(-)
> > > >  create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vand.c
> > > >
> > > > diff --git a/gcc/config/arm/iterators.md
> b/gcc/config/arm/iterators.md
> > > > index 592af35..badad2b 100644
> > > > --- a/gcc/config/arm/iterators.md
> > > > +++ b/gcc/config/arm/iterators.md
> > > > @@ -147,7 +147,12 @@ (define_mode_iterator VW [V8QI V4HI V2SI])
> > > >  (define_mode_iterator VN [V8HI V4SI V2DI])
> > > >
> > > >  ;; All supported vector modes (except singleton DImode).
> > > > -(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF
> > V8HF
> > > > V2SF V4SF V2DI])
> > > > +(define_mode_iterator VDQ [(V8QI "!TARGET_HAVE_MVE") V16QI
> > > > +(V4HI "!TARGET_HAVE_MVE") V8HI
> > > > +(V2SI "!TARGET_HAVE_MVE") V4SI
> > > > +(V4HF "!TARGET_HAVE_MVE") V8HF
> > > > +(V2SF "!TARGET_HAVE_MVE") V4SF
> > > > +(V2DI "!TARGET_HAVE_MVE")])
> > > >
> > > >  ;; All supported floating-point vector modes (except V2DF).
> > > >  (define_mode_iterator VF [(V4HF "TARGET_NEON_FP16INST")
> > > > @@ -1232,8 +1237,7 @@ (define_int_attr supf [(VCVTQ_TO_F_S "s")
> > > > (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
> > > >  (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S
> "s")
> > > >  (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
> > > >  (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
> > > > -(VADDVQ_P_S "s") (VADDVQ_P_U "u") (VANDQ_S "s")
> > > > -(VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
> > > > +(VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBICQ_S "s")
> > > > (VBICQ_U "u")
> > > >  (VBRSRQ_N_S "s") (VBRSRQ_N_U "u")
> > > > (VCADDQ_ROT270_S "s")
> > > >  (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
> > > >  (VCMPEQQ_S "s") (VCMPEQQ_U "u")
> > > > (VCADDQ_ROT90_U "u")
> > > > @@ -1501,7 +1505,6 @@ (define_int_iterator VABDQ [VABDQ_S
> > VABDQ_U])
> > > >  (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
> > > >  (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
> > > >  (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
> > > > -(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
> > > >  (define_int_iterator VBICQ [VBICQ_S VBICQ_U])
> > > >  (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
> > > >  (define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S
> > > > VCADDQ_ROT270_U])
> > > > diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> > > > index ecbaaa9..238c828 100644
> > > > --- a/gcc/config/arm/mve.md
> > > > +++ b/gcc/config/arm/mve.md
>

Re: [PATCH] data-ref: Rework integer handling in split_constant_offset [PR98069]

2020-12-10 Thread Richard Biener

On Thu, 10 Dec 2020, Richard Sandiford wrote:

> Richard Biener  writes:
> >> @@ -812,33 +997,80 @@ split_constant_offset_1 (tree type, tree op0, enum 
> >> tree_code code, tree op1,
> >>  }
> >>  }
> >>  
> >> -/* Expresses EXP as VAR + OFF, where off is a constant.  The type of OFF
> >> -   will be ssizetype.  */
> >> +/* If EXP has pointer type, try to express it as:
> >> +
> >> + POINTER_PLUS <*VAR, (sizetype) *OFF>
> >> +
> >> +   where:
> >> +
> >> +   - *VAR has the same type as EXP
> >> +   - *OFF is a constant of type ssizetype.
> >> +
> >> +   If EXP has an integral type, try to express (sizetype) EXP as:
> >> +
> >> + *VAR + (sizetype) *OFF
> >> +
> >> +   where:
> >> +
> >> +   - *VAR has type sizetype
> >> +   - *OFF is a constant of type ssizetype.
> >> +
> >> +   If EXP_RANGE is nonnull, set it to the range of EXP.
> >> +
> >> +   CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
> >> +   visited.  LIMIT counts down the number of SSA names that we are
> >> +   allowed to process before giving up.  */
> >>  
> >>  static void
> >> -split_constant_offset (tree exp, tree *var, tree *off,
> >> +split_constant_offset (tree exp, tree *var, tree *off, value_range 
> >> *exp_range,
> >>   hash_map > &cache,
> >>   unsigned *limit)
> >>  {
> >> -  tree type = TREE_TYPE (exp), op0, op1, e, o;
> >> +  tree type = TREE_TYPE (exp), op0, op1;
> >>enum tree_code code;
> >>  
> >> -  *var = exp;
> >> -  *off = ssize_int (0);
> >> +  code = TREE_CODE (exp);
> >> +  if (exp_range)
> >> +{
> >> +  *exp_range = type;
> >> +  if (code == SSA_NAME)
> >> +  {
> >> +wide_int var_min, var_max;
> >> +value_range_kind vr_kind = get_range_info (exp, &var_min, &var_max);
> >> +wide_int var_nonzero = get_nonzero_bits (exp);
> >> +vr_kind = intersect_range_with_nonzero_bits (vr_kind,
> >> + &var_min, &var_max,
> >> + var_nonzero,
> >> + TYPE_SIGN (type));
> >> +if (vr_kind == VR_RANGE)
> >> +  *exp_range = value_range (type, var_min, var_max);
> >> +  }
> >> +}
> >>  
> >> -  if (tree_is_chrec (exp)
> >> -  || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS)
> >> -return;
> >> +  if (!tree_is_chrec (exp)
> >> +  && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
> >> +{
> >> +  extract_ops_from_tree (exp, &code, &op0, &op1);
> >> +  if (split_constant_offset_1 (type, op0, code, op1, var, off,
> >> + exp_range, cache, limit))
> >> +  return;
> >> +}
> >>  
> >> -  code = TREE_CODE (exp);
> >> -  extract_ops_from_tree (exp, &code, &op0, &op1);
> >> -  if (split_constant_offset_1 (type, op0, code, op1, &e, &o, cache, 
> >> limit))
> >> +  *var = exp;
> >> +  if (INTEGRAL_TYPE_P (type))
> >> +*var = fold_convert (sizetype, *var);
> >> +  *off = ssize_int (0);
> >> +  if (exp_range && code != SSA_NAME)
> >>  {
> >> -  *var = e;
> >> -  *off = o;
> >> +  wide_int var_min, var_max;
> >> +  if (determine_value_range (exp, &var_min, &var_max) == VR_RANGE)
> >> +  *exp_range = value_range (type, var_min, var_max);
> >
> > So this call is only for the case the recursion failed, otherwise
> > we build exp_range during the recursive call, correct?
> 
> Yeah, that's right.
> 
> > The patch is OK.
> 
> Thanks.  For the record, now also tested on x86_64-linux-gnu.
> 
> I'm not sure what to do about backports though.  It seems a bit
> invasive for GCC 8 and 9 at least (PR95396).

I'd say we try for GCC 10 and only then decide (which means GCC 8
very likely not getting it).

Richard.

[PATCH] remove obsolete conversion handling from vectorizable_assignment

2020-12-10 Thread Richard Biener

This removes an odd special-case of VECTOR_BOOLEAN_TYPE_P typed
conversions from vectorizable_assignment that was obsoleted by
making all integer mode VECTOR_BOOLEAN_TYPE_P types have 1-bit
precision bool components with 605c2a393d3a2db8

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2020-12-10  Richard Biener  

* tree-vect-stmts.c (vectorizable_assignment): Remove special
allowance of VECTOR_BOOLEAN_TYPE_P conversions.
---
 gcc/tree-vect-stmts.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index d3ab8aa1c29..11737a38a56 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -5143,12 +5143,7 @@ vectorizable_assignment (vec_info *vinfo,
   /* But a conversion that does not change the bit-pattern is ok.  */
   && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
> TYPE_PRECISION (TREE_TYPE (op)))
-  && TYPE_UNSIGNED (TREE_TYPE (op)))
-  /* Conversion between boolean types of different sizes is
-a simple assignment in case their vectypes are same
-boolean vectors.  */
-  && (!VECTOR_BOOLEAN_TYPE_P (vectype)
- || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
+  && TYPE_UNSIGNED (TREE_TYPE (op
 {
   if (dump_enabled_p ())
 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-- 
2.26.2

Re: [PATCH] Fix up testcase.

2020-12-10 Thread Prathamesh Kulkarni via Gcc-patches

On Wed, 9 Dec 2020 at 15:52, Hongtao Liu  wrote:
>
> On Wed, Dec 9, 2020 at 5:22 PM Prathamesh Kulkarni via Gcc-patches
>  wrote:
> >
> > On Wed, 9 Dec 2020 at 00:29, sunil.k.pandey  wrote:
> > >
> > > On Linux/x86_64,
> > >
> > > 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1 is the first bad commit
> > > commit 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1
> > > Author: Prathamesh Kulkarni 
> > > Date:   Tue Dec 8 14:30:04 2020 +0530
> > >
> > > gimple-isel: Fold x CMP y ? -1 : 0 to x CMP y [PR97872]
> > >
> > > caused
> > >
> > > FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
> > Hi,
> > This is a known issue with the patch, and discussed here:
> > https://gcc.gnu.org/pipermail/gcc/2020-December/234438.html
> > I guess Hongtao will check in a fix for that soon.
> >
>
> According to https://uops.info/table.html,
> both pcmpeqq and pcmpeqd use only port 1, so i think there's no
> performance difference between
>
> vpcmpeqq %xmm1, %xmm0, %xmm0
> vpxor %xmm1, %xmm1, %xmm1
> vpcmpeqq %xmm1, %xmm0, %xmm0
>
> and
>
> vpcmpeqq %xmm1, %xmm0, %xmm0
> vpcmpeqd %xmm1, %xmm1, %xmm1
> vpandn %xmm1, %xmm0, %xmm0
>
> So fix up testcase as below.
>
> gcc/testsuite
>
> * gcc.target/i386/i386/pr78102.c: Adjust testcase.
>
> 1 file changed, 1 insertion(+), 1 deletion(-)
> gcc/testsuite/gcc.target/i386/pr78102.c | 2 +-
>
> modified   gcc/testsuite/gcc.target/i386/pr78102.c
> @@ -1,7 +1,7 @@
>  /* PR target/78102 */
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mno-sse4.2 -msse4.1" } */
> -/* { dg-final { scan-assembler-times "pcmpeqq" 3 } } */
> +/* { dg-final { scan-assembler-times "pcmpeq" 4 } } */
>
> Ok for trunk?
Thanks for the fix!
Just a small nit - Should it be "pcmpeqq" rather than "pcmpeq" in the
dg-final line ?

Thanks,
Prathamesh
>
>
>
> --
> BR,
> Hongtao

Re: Help with PR97872

2020-12-10 Thread Prathamesh Kulkarni via Gcc-patches

On Thu, 10 Dec 2020 at 17:11, Richard Biener  wrote:
>
> On Wed, 9 Dec 2020, Prathamesh Kulkarni wrote:
>
> > On Tue, 8 Dec 2020 at 14:36, Prathamesh Kulkarni
> >  wrote:
> > >
> > > On Mon, 7 Dec 2020 at 17:37, Hongtao Liu  wrote:
> > > >
> > > > On Mon, Dec 7, 2020 at 7:11 PM Prathamesh Kulkarni
> > > >  wrote:
> > > > >
> > > > > On Mon, 7 Dec 2020 at 16:15, Hongtao Liu  wrote:
> > > > > >
> > > > > > On Mon, Dec 7, 2020 at 5:47 PM Richard Biener  
> > > > > > wrote:
> > > > > > >
> > > > > > > On Mon, 7 Dec 2020, Prathamesh Kulkarni wrote:
> > > > > > >
> > > > > > > > On Mon, 7 Dec 2020 at 13:01, Richard Biener  
> > > > > > > > wrote:
> > > > > > > > >
> > > > > > > > > On Mon, 7 Dec 2020, Prathamesh Kulkarni wrote:
> > > > > > > > >
> > > > > > > > > > On Fri, 4 Dec 2020 at 17:18, Richard Biener 
> > > > > > > > > >  wrote:
> > > > > > > > > > >
> > > > > > > > > > > On Fri, 4 Dec 2020, Prathamesh Kulkarni wrote:
> > > > > > > > > > >
> > > > > > > > > > > > On Thu, 3 Dec 2020 at 16:35, Richard Biener 
> > > > > > > > > > > >  wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Thu, 3 Dec 2020, Prathamesh Kulkarni wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > > On Tue, 1 Dec 2020 at 16:39, Richard Biener 
> > > > > > > > > > > > > >  wrote:
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > On Tue, 1 Dec 2020, Prathamesh Kulkarni wrote:
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > Hi,
> > > > > > > > > > > > > > > > For the test mentioned in PR, I was trying to 
> > > > > > > > > > > > > > > > see if we could do
> > > > > > > > > > > > > > > > specialized expansion for vcond in target when 
> > > > > > > > > > > > > > > > operands are -1 and 0.
> > > > > > > > > > > > > > > > arm_expand_vcond gets the following operands:
> > > > > > > > > > > > > > > > (reg:V8QI 113 [ _2 ])
> > > > > > > > > > > > > > > > (reg:V8QI 117)
> > > > > > > > > > > > > > > > (reg:V8QI 118)
> > > > > > > > > > > > > > > > (lt (reg/v:V8QI 115 [ a ])
> > > > > > > > > > > > > > > > (reg/v:V8QI 116 [ b ]))
> > > > > > > > > > > > > > > > (reg/v:V8QI 115 [ a ])
> > > > > > > > > > > > > > > > (reg/v:V8QI 116 [ b ])
> > > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > > where r117 and r118 are set to vector constants 
> > > > > > > > > > > > > > > > -1 and 0 respectively.
> > > > > > > > > > > > > > > > However, I am not sure if there's a way to 
> > > > > > > > > > > > > > > > check if the register is
> > > > > > > > > > > > > > > > constant during expansion time (since we don't 
> > > > > > > > > > > > > > > > have df analysis yet) ?
> > > > > >
> > > > > > It seems to me that all you need to do is relax the predicates of 
> > > > > > op1
> > > > > > and op2 in vcondmn to accept const0_rtx and constm1_rtx. I haven't
> > > > > > debugged it, but I see that vcondmn in neon.md only accepts
> > > > > > s_register_operand.
> > > > > >
> > > > > > (define_expand "vcond"
> > > > > >   [(set (match_operand:VDQW 0 "s_register_operand")
> > > > > > (if_then_else:VDQW
> > > > > >   (match_operator 3 "comparison_operator"
> > > > > > [(match_operand:VDQW 4 "s_register_operand")
> > > > > >  (match_operand:VDQW 5 "reg_or_zero_operand")])
> > > > > >   (match_operand:VDQW 1 "s_register_operand")
> > > > > >   (match_operand:VDQW 2 "s_register_operand")))]
> > > > > >   "TARGET_NEON && (! || 
> > > > > > flag_unsafe_math_optimizations)"
> > > > > > {
> > > > > >   arm_expand_vcond (operands, mode);
> > > > > >   DONE;
> > > > > > })
> > > > > >
> > > > > > in sse.md it's defined as
> > > > > > (define_expand "vcondu"
> > > > > >   [(set (match_operand:V_512 0 "register_operand")
> > > > > > (if_then_else:V_512
> > > > > >   (match_operator 3 ""
> > > > > > [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
> > > > > >  (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
> > > > > >   (match_operand:V_512 1 "general_operand")
> > > > > >   (match_operand:V_512 2 "general_operand")))]
> > > > > >   "TARGET_AVX512F
> > > > > >&& (GET_MODE_NUNITS (mode)
> > > > > >== GET_MODE_NUNITS (mode))"
> > > > > > {
> > > > > >   bool ok = ix86_expand_int_vcond (operands);
> > > > > >   gcc_assert (ok);
> > > > > >   DONE;
> > > > > > })
> > > > > >
> > > > > > then we can get operands[1] and operands[2] as
> > > > > >
> > > > > > (gdb) p debug_rtx (operands[1])
> > > > > >  (const_vector:V16QI [
> > > > > > (const_int -1 [0x]) repeated x16
> > > > > > ])
> > > > > > (gdb) p debug_rtx (operands[2])
> > > > > > (reg:V16QI 82 [ _2 ])
> > > > > > (const_vector:V16QI [
> > > > > > (const_int 0 [0]) repeated x16
> > > > > > ])
> > > > > Hi Hongtao,
> > > > > Thanks for the suggestions!
> > > > > However IIUC from vector extensions doc page, the result of vector
> > > > > comparison is defined to be 0
> > > > > or -1, so would it be b

[PATCH] dojump, v2: Fix up probabilities splitting in dojump.c comparison splitting [PR98212]

2020-12-10 Thread Jakub Jelinek via Gcc-patches

On Thu, Dec 10, 2020 at 12:50:02PM +0100, Eric Botcazou wrote:
> prob.split adjusts prob so this needs to be reflected in the comment (maybe 
> "adjusted prob" or the formula if it is simple).  Otherwise looks good to me.

Actually I went back to drawing board and the patch wasn't correct.
Let's discuss the probabilities in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98212#c4
for !and_them it looks all correct, so for
bar we split
if (a != b) goto t; // prob 90%
goto f;
into:
if (a unord b) goto t; // first_prob = prob * cprob = 90% * 1% = 0.9%
if (a ltgt b) goto t; // adjusted prob = (prob - first_prob) / (1 - first_prob) 
= (90% - 0.9%) / (1 - 0.9%) = 89.909%
and for qux we split
if (a != b) goto t; // prob 10%
goto f;
into:
if (a unord b) goto t; // first_prob = prob * cprob = 10% * 1% = 0.1%
if (a ltgt b) goto t; // adjusted prob = (prob - first_prob) / (1 - first_prob) 
= (10% - 0.1%) / (1 - 0.1%) = 9.910%
Now, the and_them cases should be probability wise exactly the same
if we swap the f and t labels, because baz
if (a == b) goto t; // prob 90%
goto f;
is equivalent to:
if (a != b) goto f; // prob 10%
goto t;
which is in qux.  This means we could expand baz as:
if (a unord b) goto f; // 0.1%
if (a ltgt b) goto f; // 9.910%
goto t;
But we don't expand it exactly that way, but instead (as the comment says)
as:
if (a ord b) ; else goto f; // first_prob as probability of ;
if (a uneq b) goto t; // adjusted prob
goto f;
So, first_prob.invert () should be 0.1% and adjusted prob should be
1 - 9.910%.
Thus, the right thing is 4 inverts:
prob = prob.invert (); // baz is equivalent to qux with swap(t, f) and thus 
inverted original prob
first_prob = prob.split (cprob.invert ()).invert ();
// cprob.invert because by doing if (cond) ; else goto f; we effectively invert 
the condition
// the second invert because first_prob is probability of ; rather than goto f
prob = prob.invert (); // lastly because adjusted prob we want is
// probability of goto t;, while the one from corresponding !and_them case
// would be if (...) goto f; goto t;

2020-12-10  Jakub Jelinek  

PR rtl-optimization/98212
* dojump.c (do_compare_rtx_and_jump): Change computation of
first_prob for and_them.  Add comment explaining and_them case.

* gcc.dg/predict-8.c: Adjust expected probability.

--- gcc/dojump.c.jj 2020-12-10 12:30:50.677948803 +0100
+++ gcc/dojump.c2020-12-10 13:17:30.568082332 +0100
@@ -1138,19 +1138,38 @@ do_compare_rtx_and_jump (rtx op0, rtx op
cprob = cprob.apply_scale (99, 100);
  else
cprob = profile_probability::even ();
- /* We want to split:
+ /* For and_them we want to split:
 if (x) goto t; // prob;
+goto f;
 into
-if (a) goto t; // first_prob;
-if (b) goto t; // prob;
+if (a) ; else goto f; // first_prob for ;
+  // 1 - first_prob for goto f;
+if (b) goto t; // adjusted prob;
+goto f;
 such that the overall probability of jumping to t
-remains the same and first_prob is prob * cprob.  */
+remains the same.  The and_them case should be
+probability-wise equivalent to the !and_them case with
+f and t swapped and also the conditions inverted, i.e.
+if (!a) goto f;
+if (!b) goto f;
+goto t;
+where the overall probability of jumping to f is
+1 - prob (thus the first prob.invert () below).
+cprob.invert () is because the a condition is inverted,
+so if it was originally ORDERED, !a is UNORDERED and
+thus should be relative 1% rather than 99%.
+The invert () on assignment to first_prob is because
+first_prob represents the probability of fallthru,
+rather than goto f.  And the last prob.invert () is
+because the adjusted prob represents the probability of
+jumping to t rather than to f.  */
  if (and_them)
{
  rtx_code_label *dest_label;
- prob = prob.invert ();
- profile_probability first_prob = prob.split (cprob).invert ();
- prob = prob.invert ();
+ prob = prob.invert ();
+ profile_probability first_prob
+   = prob.split (cprob.invert ()).invert ();
+ prob = prob.invert ();
  /* If we only jump if true, just bypass the second jump.  */
  if (! if_false_label)
{
@@ -1163,6 +1182,15 @@ do_compare_rtx_and_jump (rtx op0, rtx op
   do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, 
mode,
   size, dest

Re: [RFC] [avr] Toolchain Integration for Testsuite Execution (avr cc0 to mode_cc0 conversion)

2020-12-10 Thread abebeos via Gcc-patches

Στις Πέμ, 10 Δεκ 2020 στις 7:42 π.μ., ο/η Dimitar Dimitrov 
έγραψε:

> On сряда, 9 декември 2020 г. 15:12:49 EET abebeos via Gcc-patches wrote:
> > Essence:
> >
> > I need a confirmation that the testsuite setup as presented in:
> >
> > https://github.com/abebeos/avr-gnu
> >
> > works fine.
> >
> > The problem with the avr target is that the testsuite cannot be run
> easily,
> > mainly because of the need for a special simulated-target setup, which
> does
> > not work for avr as documented. This led developers to a dead-end with
> > their non-cc0-avr-backends (the non-cc0 backend is needed thus avr is not
> > dropped from gcc11).
> >
> > I integrated a toolchain/testsetup to be able to run the gcc testsuite
> > against a simulated avr target.
> >
> > I then used this toolchain to test 2 different existent
> > non-cc0-avr-backends (from pipcet and saaadhu, both github).
> >
> > The result is that saaadhu's backend seems to be working 100%. It has
> > identical testsuite results with the existing (but deprecated)
> cc0-backend,
> > which means that it can be used "as-is" for inclusion in gcc11.
> >
> > Please note that I did this work in context of a bounty @ bountysouce,
> more
> > information within the issue:
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92729#c35
> Hi,
>
> I tested the trees you have given with my own AVR test setup [1]. I
> confirm
> your results:
>   - saaadhu's tree does not introduce any regressions.
>

ok

  - pipcet's tree has 142 gcc and 299 g++ regressions (although many of them
> are duplicates, e.g. same test case with different optimization
> levels).
>
> It's a bit awkward to copy gcc/config/avr into a mainline tree


Possibly a matter of preference, but when I'm insecure, I prefer low-level
ops (e.g. filesystem).


> Looking at their github history, both authors made some small changes in
> other areas.


saaadhu has one change, already in upstream:
https://github.com/saaadhu/gcc-avr-cc0/issues/1

I don't remember why choose to ignored the 2 changes (outside
gcc/config/avr) of pipcet's.

I'll repeat the test-run later with the two files recreated.

I would have prefered to cherry-pick or apply patches.
>
[...]

 (see comment in cp-avr-*  : "#TD: nonsense script, use a direct git
checkout")

https://github.com/dinuxbg/gnupru/blob/master/testing/buildbot-avr.sh
>

Nice one, this is kind of what I was asking for within

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92729#c11

before going on to integrate an own one.

But the main thing is anyways:

" - saaadhu's tree does not introduce any regressions"

Re: [PATCH] dojump, v2: Fix up probabilities splitting in dojump.c comparison splitting [PR98212]

2020-12-10 Thread Eric Botcazou

> 2020-12-10  Jakub Jelinek  
> 
>   PR rtl-optimization/98212
>   * dojump.c (do_compare_rtx_and_jump): Change computation of
>   first_prob for and_them.  Add comment explaining and_them case.
> 
>   * gcc.dg/predict-8.c: Adjust expected probability.
> 
> --- gcc/dojump.c.jj   2020-12-10 12:30:50.677948803 +0100
> +++ gcc/dojump.c  2020-12-10 13:17:30.568082332 +0100
> @@ -1138,19 +1138,38 @@ do_compare_rtx_and_jump (rtx op0, rtx op
>   cprob = cprob.apply_scale (99, 100);
> else
>   cprob = profile_probability::even ();
> -   /* We want to split:
> +   /* For and_them we want to split:
>if (x) goto t; // prob;
> +  goto f;
>into
> -  if (a) goto t; // first_prob;
> -  if (b) goto t; // prob;
> +  if (a) ; else goto f; // first_prob for ;
> +// 1 - first_prob for goto 
f;
> +  if (b) goto t; // adjusted prob;
> +  goto f;
>such that the overall probability of jumping to t
> -  remains the same and first_prob is prob * cprob.  */
> +  remains the same.  The and_them case should be
> +  probability-wise equivalent to the !and_them case with
> +  f and t swapped and also the conditions inverted, i.e.
> +  if (!a) goto f;
> +  if (!b) goto f;
> +  goto t;
> +  where the overall probability of jumping to f is
> +  1 - prob (thus the first prob.invert () below).
> +  cprob.invert () is because the a condition is inverted,
> +  so if it was originally ORDERED, !a is UNORDERED and
> +  thus should be relative 1% rather than 99%.
> +  The invert () on assignment to first_prob is because
> +  first_prob represents the probability of fallthru,
> +  rather than goto f.  And the last prob.invert () is
> +  because the adjusted prob represents the probability of
> +  jumping to t rather than to f.  */
> if (and_them)
>   {
> rtx_code_label *dest_label;
> -   prob = prob.invert ();
> -   profile_probability first_prob = prob.split 
(cprob).invert ();
> -   prob = prob.invert ();
> +   prob = prob.invert ();
> +   profile_probability first_prob
> + = prob.split (cprob.invert ()).invert ();
> +   prob = prob.invert ();
> /* If we only jump if true, just bypass the second 
jump.  */
> if (! if_false_label)
>   {

OK, thanks, but aren't there missing TABs in the new version?  Only one line 
is changed in the end AFAICS.

-- 
Eric Botcazou

[backport gcc-10][AArch64] ACLE bf16 convert

2020-12-10 Thread Dennis Zhang via Gcc-patches

Hi all,

This patch backports the commit f7d6961126a7f06c8089d8a58bd21be43bc16806.
The original is approved at 
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/557859.html
The only change is to remove FPCR-reading flags for builtin definition since 
it's not supported in gcc-10.
Regtested and bootstrapped for aarch64-none-linux-gnu.

Is it OK to backport?

Cheers
Dennisdiff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index ba2bda26dcdd4947dc724851433451433d378724..7192f3954d311d89064707cfcb735efad4377c12 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -728,3 +728,8 @@
   VAR1 (UNOP, bfcvtn_q, 0, v8bf)
   VAR1 (BINOP, bfcvtn2, 0, v8bf)
   VAR1 (UNOP, bfcvt, 0, bf)
+
+  /* Implemented by aarch64_{v}bfcvt{_high}.  */
+  VAR2 (UNOP, vbfcvt, 0, v4bf, v8bf)
+  VAR1 (UNOP, vbfcvt_high, 0, v8bf)
+  VAR1 (UNOP, bfcvt, 0, sf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9f0e2bd1e6ff5246f84e919402c687687a84beb8..2e8aa668b107f039e4958b6998da180a6d11b881 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7238,3 +7238,31 @@
   "bfcvt\\t%h0, %s1"
   [(set_attr "type" "f_cvt")]
 )
+
+;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
+(define_insn "aarch64_vbfcvt"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
+		  UNSPEC_BFCVTN))]
+  "TARGET_BF16_SIMD"
+  "shll\\t%0.4s, %1.4h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_vbfcvt_highv8bf"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+	(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
+		  UNSPEC_BFCVTN2))]
+  "TARGET_BF16_SIMD"
+  "shll2\\t%0.4s, %1.8h, #16"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+(define_insn "aarch64_bfcvtsf"
+  [(set (match_operand:SF 0 "register_operand" "=w")
+	(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
+		UNSPEC_BFCVT))]
+  "TARGET_BF16_FP"
+  "shl\\t%d0, %d1, #16"
+  [(set_attr "type" "neon_shift_imm")]
+)
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index 984875dcc014300c489209c11abf41b1c47b7fbe..881615498d3d52662d7ebb3ab1e8d52d5a40cab8 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -40,6 +40,13 @@ vcvth_bf16_f32 (float32_t __a)
   return __builtin_aarch64_bfcvtbf (__a);
 }
 
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtah_f32_bf16 (bfloat16_t __a)
+{
+  return __builtin_aarch64_bfcvtsf (__a);
+}
+
 #pragma GCC pop_options
 
 #endif
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 95bfa5ebba21b739ee3c84e3971337646f8881d4..69cccd3278642814f3961c5bf52be5639f5ef3f3 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -35680,6 +35680,27 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
   return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
 }
 
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvt_f32_bf16 (bfloat16x4_t __a)
+{
+  return __builtin_aarch64_vbfcvtv4bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_low_f32_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vbfcvtv8bf (__a);
+}
+
+__extension__ extern __inline float32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcvtq_high_f32_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vbfcvt_highv8bf (__a);
+}
+
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvt_bf16_f32 (float32x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
index bbea630b1820d578bdf1619834f29b919f5c3f32..47af7c494d9b9d1f4b63e802efc293348a40e270 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c
@@ -46,3 +46,43 @@ bfloat16_t test_bfcvt (float32_t a)
 {
   return vcvth_bf16_f32 (a);
 }
+
+/*
+**test_vcvt_f32_bf16:
+** shll	v0.4s, v0.4h, #16
+** ret
+*/
+float32x4_t test_vcvt_f32_bf16 (bfloat16x4_t a)
+{
+  return vcvt_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_low_f32_bf16:
+** shll	v0.4s, v0.4h, #16
+** ret
+*/
+float32x4_t test_vcvtq_low_f32_bf16 (bfloat16x8_t a)
+{
+  return vcvtq_low_f32_bf16 (a);
+}
+
+/*
+**test_vcvtq_high_f32_bf16:
+** shll2	v0.4s, v0.8h, #16
+** ret
+*/
+float32x4_t test_vcvtq_high_f32_bf16 (bfloat16x8_t a)
+{
+  return vcvtq_high_f32_bf16 (a);
+}
+
+/*
+**test_vcvtah_f32_bf16:
+** shl	d0, d0, #16

[backport gcc-10][AArch64] ACLE bf16 get

2020-12-10 Thread Dennis Zhang via Gcc-patches

Hi all,

This patch backports the commit 3553c658533e430b232997bdfd97faf6606fb102.
The original is approved at 
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/557871.html
There is a change to remove FPCR-reading flag for builtin declaration since 
it's not supported in gcc-10.

Another change is to remove a test (bf16_get-be.c) that fails compiling on 
aarch64-none-linux-gnu in the original patch.
This is reported at 
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/558195.html
The failure happens for several bf16 big-endian tests so the bug would be fixed 
in a separate patch.
And the test should be added after the bug is fixed.

Is it OK to backport?

Cheers
Dennisdiff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index ba2bda26dcdd4947dc724851433451433d378724..05726db1f6137f9ab29fcdd51f804199e24bbfcf 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -718,6 +718,10 @@
   VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, v4sf)
   VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, v4sf)
 
+  /* Implemented by aarch64_vget_lo/hi_halfv8bf.  */
+  VAR1 (UNOP, vget_lo_half, 0, v8bf)
+  VAR1 (UNOP, vget_hi_half, 0, v8bf)
+
   /* Implemented by aarch64_simd_mmlav16qi.  */
   VAR1 (TERNOP, simd_smmla, 0, v16qi)
   VAR1 (TERNOPU, simd_ummla, 0, v16qi)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9f0e2bd1e6ff5246f84e919402c687687a84beb8..43ac3cd40fe8379567b7a60772f360d37818e8e9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7159,6 +7159,27 @@
   [(set_attr "type" "neon_dot")]
 )
 
+;; vget_low/high_bf16
+(define_expand "aarch64_vget_lo_halfv8bf"
+  [(match_operand:V4BF 0 "register_operand")
+   (match_operand:V8BF 1 "register_operand")]
+  "TARGET_BF16_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
+  emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
+  DONE;
+})
+
+(define_expand "aarch64_vget_hi_halfv8bf"
+  [(match_operand:V4BF 0 "register_operand")
+   (match_operand:V8BF 1 "register_operand")]
+  "TARGET_BF16_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
+  emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
+  DONE;
+})
+
 ;; bfmmla
 (define_insn "aarch64_bfmmlaqv4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 95bfa5ebba21b739ee3c84e3971337646f8881d4..0fd78a6fd076f788d2618c492a026246e61e438c 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -35680,6 +35680,20 @@ vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b,
   return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index);
 }
 
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vget_low_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vget_lo_halfv8bf (__a);
+}
+
+__extension__ extern __inline bfloat16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vget_high_bf16 (bfloat16x8_t __a)
+{
+  return __builtin_aarch64_vget_hi_halfv8bf (__a);
+}
+
 __extension__ extern __inline bfloat16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vcvt_bf16_f32 (float32x4_t __a)
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c
new file mode 100644
index ..2193753ffbb6246aa16eb5033559b21266a556a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_get.c
@@ -0,0 +1,27 @@
+/* { dg-do assemble { target { aarch64*-*-* } } } */
+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
+/* { dg-add-options arm_v8_2a_bf16_neon } */
+/* { dg-additional-options "-save-temps" } */
+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
+
+#include 
+
+/*
+**test_vget_low_bf16:
+** ret
+*/
+bfloat16x4_t test_vget_low_bf16 (bfloat16x8_t a)
+{
+  return vget_low_bf16 (a);
+}
+
+/*
+**test_vget_high_bf16:
+** dup	d0, v0.d\[1\]
+** ret
+*/
+bfloat16x4_t test_vget_high_bf16 (bfloat16x8_t a)
+{
+  return vget_high_bf16 (a);
+}

Re: [PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2020-12-10 Thread Jakub Jelinek via Gcc-patches

On Wed, Dec 09, 2020 at 05:37:24PM +, Kwok Cheung Yeung wrote:
> --- a/gcc/c/c-typeck.c
> +++ b/gcc/c/c-typeck.c
> @@ -14942,6 +14942,11 @@ c_finish_omp_clauses (tree clauses, enum 
> c_omp_region_type ort)
> pc = &OMP_CLAUSE_CHAIN (c);
> continue;
>  
> + case OMP_CLAUSE_DETACH:
> +   t = OMP_CLAUSE_DECL (c);
> +   pc = &OMP_CLAUSE_CHAIN (c);
> +   continue;
> +

If you wouldn't need to do anything for C for the detach clause, just would
just add:
case OMP_CLAUSE_DETACH:
at the end of the case list that starts below:
>   case OMP_CLAUSE_IF:
>   case OMP_CLAUSE_NUM_THREADS:
>   case OMP_CLAUSE_NUM_TEAMS:

But you actually do need to do something, even for C.

There are two restrictions:
- At most one detach clause can appear on the directive.
- If a detach clause appears on the directive, then a mergeable clause cannot 
appear on the same directive.
that should be checked and diagnosed.  One place to do that would be
like usually in all the FEs separately, that would mean adding
  bool mergeable_seen = false, detach_seen = false;
vars and for those clauses setting the *_seen, plus for DETACH
already complain if detach_seen is already true and remove the clause.
And at the end of the loop if mergeable_seen && detach_seen, diagnose
and remove one of them (perhaps better detach clause).
There is the optional second loop that can be used for the removal...

Testcase coverage should include:
  #pragma omp task detach (x) detach (y)
as well as
  #pragma omp task mergeable detach (x)
and
  #pragma omp task detach (x) mergeable
(and likewise for Fortran).

> +  if (cp_lexer_next_token_is_not (parser->lexer, CPP_NAME))
> +{
> +  cp_parser_error (parser, "expected identifier");
> +  return list;
> +}
> +
> +  location_t id_loc = cp_lexer_peek_token (parser->lexer)->location;
> +  tree t, identifier = cp_parser_identifier (parser);
> +
> +  if (identifier == error_mark_node)
> +t = error_mark_node;
> +  else
> +{
> +  t = cp_parser_lookup_name_simple
> + (parser, identifier,
> +  cp_lexer_peek_token (parser->lexer)->location);
> +  if (t == error_mark_node)
> + cp_parser_name_lookup_error (parser, identifier, t, NLE_NULL,
> +  id_loc);

The above doesn't match what cp_parser_omp_var_list_no_open does,
in particular it should use cp_parser_id_expression
instead of cp_parser_identifier etc.

> +  else
> + {
> +   tree type = TYPE_MAIN_VARIANT (TREE_TYPE (t));
> +   if (!INTEGRAL_TYPE_P (type)
> +   || TREE_CODE (type) != ENUMERAL_TYPE
> +   || DECL_NAME (TYPE_NAME (type))
> +!= get_identifier ("omp_event_handle_t"))
> + {
> +   error_at (id_loc, "% clause event handle "
> + "has type %qT rather than "
> + "%",
> + type);
> +   return list;

You can't do this here for C++, it needs to be done in finish_omp_clauses
instead and only be done if the type is not a dependent type.
Consider (e.g. should be in testsuite)
template 
void
foo ()
{
  T t;
  #pragma omp task detach (t)
  ;
}

template 
void
bar ()
{
  T t;
  #pragma omp task detach (t)
  ;
}

void
baz ()
{
  foo  ();
  bar  (); // Instantiating this should error
}

> @@ -7394,6 +7394,9 @@ finish_omp_clauses (tree clauses, enum 
> c_omp_region_type ort)
>   }
>   }
> break;
> + case OMP_CLAUSE_DETACH:
> +   t = OMP_CLAUSE_DECL (c);
> +   break;
>  

Again, restriction checking here, plus check the type if it is
non-dependent, otherwise defer that checking for finish_omp_clauses when
it will not be dependent anymore.

I think you need to handle OMP_CLAUSE_DETACH in cp/pt.c too.

> --- a/gcc/gimplify.c
> +++ b/gcc/gimplify.c
> @@ -9733,6 +9733,19 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
> *pre_p,
>   }
> break;
>  
> + case OMP_CLAUSE_DETACH:
> +   decl = OMP_CLAUSE_DECL (c);
> +   if (outer_ctx)
> + {
> +   splay_tree_node on
> + = splay_tree_lookup (outer_ctx->variables,
> +  (splay_tree_key)decl);
> +   if (on == NULL || (on->value & GOVD_DATA_SHARE_CLASS) == 0)
> + omp_firstprivatize_variable (outer_ctx, decl);
> +   omp_notice_variable (outer_ctx, decl, true);
> + }
> +   break;

I don't understand this.  My reading of:
"The event-handle will be considered as if it was specified on a
firstprivate clause. The use of a variable in a detach clause expression of a 
task
construct causes an implicit reference to the variable in all enclosing
constructs."
is that we should do:
  case OMP_CLAUSE_DETACH:
decl = OMP_CLAUSE_DECL (c);
goto do_notice;
which does the second sentence, and for the first sentence I believe it
talks about the task construct rather than about the outer construct.
So (aga

Re: [PATCH] dojump, v2: Fix up probabilities splitting in dojump.c comparison splitting [PR98212]

2020-12-10 Thread Jakub Jelinek via Gcc-patches

On Thu, Dec 10, 2020 at 03:18:54PM +0100, Eric Botcazou wrote:
> OK, thanks, but aren't there missing TABs in the new version?  Only one line 
> is changed in the end AFAICS.

No, sorry, just me hand editing of the patch (and the source) instead of
regenerating the patch (I had there prob.invert (); instead of prob =
prob.invert (); before).

Here is the updated patch that shows that only the 1 -> 2 lines changed
appart from the comments.

2020-12-10  Jakub Jelinek  

PR rtl-optimization/98212
* dojump.c (do_compare_rtx_and_jump): Change computation of
first_prob for and_them.  Add comment explaining and_them case.

* gcc.dg/predict-8.c: Adjust expected probability.

--- gcc/dojump.c.jj 2020-12-10 12:30:50.677948803 +0100
+++ gcc/dojump.c2020-12-10 15:40:01.577251502 +0100
@@ -1138,18 +1138,37 @@ do_compare_rtx_and_jump (rtx op0, rtx op
cprob = cprob.apply_scale (99, 100);
  else
cprob = profile_probability::even ();
- /* We want to split:
+ /* For and_them we want to split:
 if (x) goto t; // prob;
+goto f;
 into
-if (a) goto t; // first_prob;
-if (b) goto t; // prob;
+if (a) ; else goto f; // first_prob for ;
+  // 1 - first_prob for goto f;
+if (b) goto t; // adjusted prob;
+goto f;
 such that the overall probability of jumping to t
-remains the same and first_prob is prob * cprob.  */
+remains the same.  The and_them case should be
+probability-wise equivalent to the !and_them case with
+f and t swapped and also the conditions inverted, i.e.
+if (!a) goto f;
+if (!b) goto f;
+goto t;
+where the overall probability of jumping to f is
+1 - prob (thus the first prob.invert () below).
+cprob.invert () is because the a condition is inverted,
+so if it was originally ORDERED, !a is UNORDERED and
+thus should be relative 1% rather than 99%.
+The invert () on assignment to first_prob is because
+first_prob represents the probability of fallthru,
+rather than goto f.  And the last prob.invert () is
+because the adjusted prob represents the probability of
+jumping to t rather than to f.  */
  if (and_them)
{
  rtx_code_label *dest_label;
  prob = prob.invert ();
- profile_probability first_prob = prob.split (cprob).invert ();
+ profile_probability first_prob
+   = prob.split (cprob.invert ()).invert ();
  prob = prob.invert ();
  /* If we only jump if true, just bypass the second jump.  */
  if (! if_false_label)
@@ -1163,6 +1182,15 @@ do_compare_rtx_and_jump (rtx op0, rtx op
   do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, 
mode,
   size, dest_label, NULL, first_prob);
}
+ /* For !and_them we want to split:
+if (x) goto t; // prob;
+goto f;
+into
+if (a) goto t; // first_prob;
+if (b) goto t; // adjusted prob;
+goto f;
+such that the overall probability of jumping to t
+remains the same and first_prob is prob * cprob.  */
   else
{
  profile_probability first_prob = prob.split (cprob);
--- gcc/testsuite/gcc.dg/predict-8.c.jj 2020-12-10 12:30:50.677948803 +0100
+++ gcc/testsuite/gcc.dg/predict-8.c2020-12-10 12:31:06.639772968 +0100
@@ -8,4 +8,4 @@ int foo(float a, float b) {
 return 2;
 }
 
-/* { dg-final { scan-rtl-dump-times "65.\[34]. .guessed" 2 "expand"} } */
+/* { dg-final { scan-rtl-dump-times "99.\[678]. .guessed" 2 "expand"} } */


Jakub

Small fix to PLACEHOLDER_EXPR handling in loc_list_from_tree_1

2020-12-10 Thread Eric Botcazou

This case handles the discriminated record types of Ada: the PLACEHOLDER_EXPR 
is the "template" expression for the discriminant in the type definition. Now 
for some components, typically arrays whose upper bound is the discriminant, 
the compiler creates a local subtype for the component, so the code needs to 
be able to deal with this nested type.

Tested on x86-64/Linux, applied on the mainline as obvious.


2020-12-10  Eric Botcazou  

* dwarf2out.c (loc_list_from_tree_1) : Deal with
a nested context type

-- 
Eric Botcazoudiff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 7b340baf422..0baa056447c 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -18245,10 +18245,11 @@ loc_list_from_tree_1 (tree loc, int want_address,
 case PLACEHOLDER_EXPR:
   /* This case involves extracting fields from an object to determine the
 	 position of other fields. It is supposed to appear only as the first
- operand of COMPONENT_REF nodes and to reference precisely the type
- that the context allows.  */
+	 operand of COMPONENT_REF nodes and to reference precisely the type
+	 that the context allows or its enclosing type.  */
   if (context != NULL
-  && TREE_TYPE (loc) == context->context_type
+	  && (TREE_TYPE (loc) == context->context_type
+	  || TREE_TYPE (loc) == TYPE_CONTEXT (context->context_type))
 	  && want_address >= 1)
 	{
 	  if (dwarf_version >= 3 || !dwarf_strict)

Re: [PATCH] dojump, v2: Fix up probabilities splitting in dojump.c comparison splitting [PR98212]

2020-12-10 Thread Eric Botcazou

> 2020-12-10  Jakub Jelinek  
> 
>   PR rtl-optimization/98212
>   * dojump.c (do_compare_rtx_and_jump): Change computation of
>   first_prob for and_them.  Add comment explaining and_them case.
> 
>   * gcc.dg/predict-8.c: Adjust expected probability.

OK, thanks.

-- 
Eric Botcazou

c++: name-lookup refactoring

2020-12-10 Thread Nathan Sidwell


Here are some refactorings to the name-lookup machinery.  Primarily
breakout out worker functions that the modules patch will also use.
Fixing a couple of comments on the way.

gcc/cp/
* name-lookup.c (pop_local_binding): Check for IDENTIFIER_ANON_P.
(update_binding): Level may be null, don't add namespaces to
level.
(newbinding_bookkeeping): New, broken out of ...
(do_pushdecl): ... here, call it.  Don't push anonymous decls.
(pushdecl, add_using_namespace): Correct comments.
(do_push_nested_namespace): Remove assert.
(make_namespace, make_namespace_finish): New, broken out of ...
(push_namespace): ... here.  Call them.  Add namespace to level
here.

pushing to trunk

nathan
--
Nathan Sidwell
diff --git i/gcc/cp/name-lookup.c w/gcc/cp/name-lookup.c
index fa372810349..051ef0b36b1 100644
--- i/gcc/cp/name-lookup.c
+++ w/gcc/cp/name-lookup.c
@@ -1916,7 +1916,7 @@ push_binding (tree id, tree decl, cp_binding_level* level)
 void
 pop_local_binding (tree id, tree decl)
 {
-  if (id == NULL_TREE)
+  if (!id || IDENTIFIER_ANON_P (id))
 /* It's easiest to write the loops that call this function without
checking whether or not the entities involved have names.  We
get here for such an entity.  */
@@ -2266,8 +2266,9 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot,
   tree to_type = old_type;
   bool local_overload = false;
 
-  gcc_assert (level->kind == sk_namespace ? !binding
+  gcc_assert (!level || level->kind == sk_namespace ? !binding
 	  : level->kind != sk_class && !slot);
+
   if (old == error_mark_node)
 old = NULL_TREE;
 
@@ -2343,7 +2344,7 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot,
 	warning (OPT_Wshadow, "%q#D hides constructor for %q#D",
 		 decl, to_type);
 
-  local_overload = old && level->kind != sk_namespace;
+  local_overload = old && level && level->kind != sk_namespace;
   to_val = ovl_insert (decl, old, -int (hiding));
 }
   else if (old)
@@ -2354,11 +2355,8 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot,
   else if (TREE_CODE (old) == TYPE_DECL)
 	{
 	  if (same_type_p (TREE_TYPE (old), TREE_TYPE (decl)))
-	{
-	  /* Two type decls to the same type.  Do nothing.  */
-	  gcc_checking_assert (!hiding);
-	  return old;
-	}
+	/* Two type decls to the same type.  Do nothing.  */
+	return old;
 	  else
 	goto conflict;
 	}
@@ -2370,7 +2368,7 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot,
 	goto conflict;
 
 	  /* The new one must be an alias at this point.  */
-	  gcc_assert (DECL_NAMESPACE_ALIAS (decl) && !hiding);
+	  gcc_assert (DECL_NAMESPACE_ALIAS (decl));
 	  return old;
 	}
   else if (TREE_CODE (old) == VAR_DECL)
@@ -2405,7 +2403,11 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot,
 	  gcc_checking_assert (binding->value && OVL_P (binding->value));
 	  update_local_overload (binding, to_val);
 	}
-  else
+  else if (level
+	   && !(TREE_CODE (decl) == NAMESPACE_DECL
+		&& !DECL_NAMESPACE_ALIAS (decl)))
+	/* Don't add namespaces here.  They're done in
+	   push_namespace.  */
 	add_decl_to_level (level, decl);
 
   if (slot)
@@ -2911,6 +2913,41 @@ push_local_extern_decl_alias (tree decl)
   DECL_LOCAL_DECL_ALIAS (decl) = alias;
 }
 
+/* DECL has just been bound at LEVEL.  finish up the bookkeeping.  */
+
+static void
+newbinding_bookkeeping (tree name, tree decl, cp_binding_level *level)
+{
+  if (TREE_CODE (decl) == TYPE_DECL)
+{
+  tree type = TREE_TYPE (decl);
+
+  if (type != error_mark_node)
+	{
+	  if (TYPE_NAME (type) != decl)
+	set_underlying_type (decl);
+
+	  set_identifier_type_value_with_scope (name, decl, level);
+
+	  if (level->kind != sk_namespace
+	  && !instantiating_current_function_p ())
+	/* This is a locally defined typedef in a function that
+	   is not a template instantation, record it to implement
+	   -Wunused-local-typedefs.  */
+	record_locally_defined_typedef (decl);
+	}
+}
+  else
+{
+  if (VAR_P (decl) && !DECL_LOCAL_DECL_P (decl))
+	maybe_register_incomplete_var (decl);
+
+  if (VAR_OR_FUNCTION_DECL_P (decl)
+	  && DECL_EXTERN_C_P (decl))
+	check_extern_c_conflict (decl);
+}
+}
+
 /* Record DECL as belonging to the current lexical scope.  Check for
errors (such as an incompatible declaration for the same name
already seen in the same scope).  IS_FRIEND is true if DECL is
@@ -2939,7 +2976,7 @@ do_pushdecl (tree decl, bool hiding)
   /* An anonymous namespace has a NULL DECL_NAME, but we still want to
  insert it.  Other NULL-named decls, not so much.  */
   tree name = DECL_NAME (decl);
-  if (name || TREE_CODE (decl) == NAMESPACE_DECL)
+  if (name ? !IDENTIFIER_ANON_P (name) : TREE_CODE (decl) == NAMESPACE_DECL)
 {
   cxx_binding *binding = N

[pushed] c++: Add fixed test [PR68451]

2020-12-10 Thread Marek Polacek via Gcc-patches

I was about to add this test with dg-ice but it turned out it had
already been fixed by the recent r11-3361!

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/testsuite/ChangeLog:

PR c++/68451
* g++.dg/cpp0x/friend6.C: New test.
---
 gcc/testsuite/g++.dg/cpp0x/friend6.C | 23 +++
 1 file changed, 23 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/friend6.C

diff --git a/gcc/testsuite/g++.dg/cpp0x/friend6.C 
b/gcc/testsuite/g++.dg/cpp0x/friend6.C
new file mode 100644
index 000..fce7e55383e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/friend6.C
@@ -0,0 +1,23 @@
+// PR c++/68451
+// { dg-do compile { target c++11 } }
+
+struct A {};
+
+struct B
+{
+A a;
+friend decltype(a);
+};
+
+template 
+struct C
+{
+A a;
+friend decltype(a);
+};
+
+int main()
+{
+B b;
+C c;
+}

base-commit: 237a8e8d5647e45aca2736906033291d49545a2c
-- 
2.29.2

[PATCH] varasm, v2: Reject soft frame or arg pointer registers for register vars [PR92469]

2020-12-10 Thread Jakub Jelinek via Gcc-patches

On Thu, Dec 10, 2020 at 12:00:17PM +0100, Jakub Jelinek wrote:
> So, would it be better to check for one of FRAME_POINTER_REGNUM,
> ARG_POINTER_REGNUM or RETURN_ADDRESS_POINTER_REGNUM if they
> are mentioned in (from part of pairs in) ELIMINABLE_REGS?

In patch form now:

2020-12-10  Jakub Jelinek  

PR target/92469
* varasm.c (eliminable_regno_p): New function.
(make_decl_rtl): Reject asm vars for frame and argp
if they are different from hard frame pointer.

* gcc.target/i386/pr92469.c: New test.
* gcc.target/i386/pr79804.c: Adjust expected diagnostics.
* gcc.target/i386/pr88178.c: Expect an error.

--- gcc/varasm.c.jj 2020-12-04 10:53:56.314043883 +0100
+++ gcc/varasm.c2020-12-10 16:02:48.273074206 +0100
@@ -1370,6 +1370,23 @@ ultimate_transparent_alias_target (tree
   return target;
 }
 
+/* Return true if REGNUM is mentioned in ELIMINABLE_REGS as a from
+   register number.  */
+
+static bool
+eliminable_regno_p (int regnum)
+{
+  static const struct
+  {
+const int from;
+const int to;
+  } eliminables[] = ELIMINABLE_REGS;
+  for (size_t i = 0; i < ARRAY_SIZE (eliminables); i++)
+if (regnum == eliminables[i].from)
+  return true;
+  return false;
+}
+
 /* Create the DECL_RTL for a VAR_DECL or FUNCTION_DECL.  DECL should
have static storage duration.  In other words, it should not be an
automatic variable, including PARM_DECLs.
@@ -1472,6 +1489,15 @@ make_decl_rtl (tree decl)
   else if (!targetm.hard_regno_mode_ok (reg_number, mode))
error ("register specified for %q+D isn%'t suitable for data type",
decl);
+  else if (reg_number != HARD_FRAME_POINTER_REGNUM
+  && (reg_number == FRAME_POINTER_REGNUM
+#ifdef RETURN_ADDRESS_POINTER_REGNUM
+  || reg_number == RETURN_ADDRESS_POINTER_REGNUM
+#endif
+  || reg_number == ARG_POINTER_REGNUM)
+  && eliminable_regno_p (reg_number))
+   error ("register specified for %q+D is an internal GCC "
+  "implementation detail", decl);
   /* Now handle properly declared static register variables.  */
   else
{
--- gcc/testsuite/gcc.target/i386/pr92469.c.jj  2020-12-09 13:41:50.497501433 
+0100
+++ gcc/testsuite/gcc.target/i386/pr92469.c 2020-12-09 13:41:30.416724986 
+0100
@@ -0,0 +1,24 @@
+/* PR target/92469 */
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+void
+foo (void)
+{ 
+  register int x asm ("frame");/* { dg-error "register specified for 
'x' is an internal GCC implementation detail" } */
+  int y = x;
+}
+
+void
+bar (void)
+{ 
+  register int x asm ("19");   /* { dg-error "register specified for 'x' is an 
internal GCC implementation detail" } */
+  int y = x;
+}
+
+void
+baz (void)
+{ 
+  register int x asm ("argp"); /* { dg-error "register specified for 'x' is an 
internal GCC implementation detail" } */
+  int y = x;
+}
--- gcc/testsuite/gcc.target/i386/pr79804.c.jj  2020-01-12 11:54:37.976389828 
+0100
+++ gcc/testsuite/gcc.target/i386/pr79804.c 2020-12-10 10:11:53.948939322 
+0100
@@ -4,7 +4,7 @@
 
 void foo (void)
 {
-  register int r19 asm ("19");
+  register int r19 asm ("19"); /* { dg-error "register specified for 'r19' is 
an internal GCC implementation detail" } */
 
-  asm volatile ("# %0" : "=r"(r19));  /* { dg-error "invalid use of register" 
} */
-}  /* { dg-error "cannot be used in 'asm' here" } */
+  asm volatile ("# %0" : "=r"(r19));
+}
--- gcc/testsuite/gcc.target/i386/pr88178.c.jj  2020-01-12 11:54:37.983389722 
+0100
+++ gcc/testsuite/gcc.target/i386/pr88178.c 2020-12-10 10:12:30.140535022 
+0100
@@ -4,5 +4,5 @@
 
 void foo (void)
 {
-  register int r19 asm ("19");
+  register int r19 asm ("19"); /* { dg-error "register specified for 'r19' is 
an internal GCC implementation detail" } */
 }


Jakub

[PATCH] ira.c: Fix ICE in ira-color [PR97092]

2020-12-10 Thread Andrea Corallo via Gcc-patches

Hi all,

following discussion on PR97092 I'd like to submit the following patch
with a fix plus associated testcase.

With this patch applied mode is recomputed at each iteration while
looping across different copies in 'update_costs_from_allocno', this
instead of carrying mode over subsequent iterations.

bootstrapped and regtested on aarch64-unknown-linux-gnu.

Okay for trunk?

Thanks!

  Andrea
  
>From 302573854794f023d8f4ab47660f367016627340 Mon Sep 17 00:00:00 2001
From: Andrea Corallo 
Date: Wed, 9 Dec 2020 17:59:12 +0100
Subject: [PATCH] ira.c: Fix ICE in ira-color [PR97092]

2020-12-10  Andrea Corallo  

gcc/ChangeLog

2020-12-10  Andrea Corallo  

PR rtl-optimization/97092
* ira-color.c (update_costs_from_allocno): Do not carry over mode
between subsequent iterations.

gcc/testsuite/ChangeLog

2020-12-10  Andrea Corallo  

* gcc.target/aarch64/sve/pr97092.c: New test.
---
 gcc/ira-color.c   |  6 +++--
 .../gcc.target/aarch64/sve/pr97092.c  | 24 +++
 2 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr97092.c

diff --git a/gcc/ira-color.c b/gcc/ira-color.c
index d3f8e23faff..eb525390494 100644
--- a/gcc/ira-color.c
+++ b/gcc/ira-color.c
@@ -1407,9 +1407,11 @@ update_costs_from_allocno (ira_allocno_t allocno, int 
hard_regno,
 register classes bigger modes might be invalid,
 e.g. DImode for AREG on x86.  For such cases the
 register move cost will be maximal.  */
- mode = narrower_subreg_mode (mode, ALLOCNO_MODE (cp->second));
+ mode = narrower_subreg_mode (ALLOCNO_MODE (cp->first),
+  ALLOCNO_MODE (cp->second));
+
  ira_init_register_move_cost_if_necessary (mode);
- 
+
  cost = (cp->second == allocno
  ? ira_register_move_cost[mode][rclass][aclass]
  : ira_register_move_cost[mode][aclass][rclass]);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97092.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr97092.c
new file mode 100644
index 000..69f7a3ee2db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr97092.c
@@ -0,0 +1,24 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O1 -ftree-vectorize -march=armv8.2-a+sve" } */
+
+void g (void);
+long a;
+
+signed char
+bar (int c, int d)
+{
+  return c + d;
+}
+
+void
+foo (void)
+{
+  int f;
+  for (; (long)foo < 4;) {
+f = 0;
+for (; f < 10; f++);
+g ();
+a = -4;
+for (; a; a = bar (a, 1));
+  }
+}
-- 
2.20.1

Ping x2: [PATCH] PowerPC: Map IEEE 128-bit long double built-in functions

2020-12-10 Thread Michael Meissner via Gcc-patches

This patch is one of the critical patches to enable building GCC with the long
double type set to IEEE 128-bit.

I haven't received a response for this patch:

| Date: Thu, 19 Nov 2020 18:58:14 -0500
| Subject: [PATCH] PowerPC: Map IEEE 128-bit long double built-in functions
| Message-ID: <20201119235814.ga...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2020-November/559659.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797

Ping x2: [PATCH] PowerPC: Add float128/Decimal conversions

2020-12-10 Thread Michael Meissner via Gcc-patches

This is one of the critical patches for enabling IEEE 128-bit long double.

I haven't received a reply for this patch:

| Date: Thu, 19 Nov 2020 19:05:24 -0500
| Subject: [PATCH] PowerPC: Add float128/Decimal conversions
| Message-ID: <2020112524.ga...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2020-November/559661.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797

Ping x2: [PATCH] PowerPC: Set long double size for IBM/IEEE.

2020-12-10 Thread Michael Meissner via Gcc-patches

This patch isn't critical for IEEE 128-bit long double, but it is a feature
Jonathan Wakely asked for, to have a single switch to enable IEEE/IBM 128-bit
long double, without having to set the long double size.

I haven't received a replay for this patch:

| Date: Thu, 19 Nov 2020 19:00:11 -0500
| Subject: [PATCH] PowerPC: Set long double size for IBM/IEEE.
| Message-ID: <2020112011.ga...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2020-November/559660.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797

Re: [PATCH] [WIP] openmp: Add OpenMP 5.0 task detach clause support

2020-12-10 Thread Jakub Jelinek via Gcc-patches

On Thu, Dec 10, 2020 at 03:38:40PM +0100, Jakub Jelinek via Gcc-patches wrote:
> I don't understand this.  My reading of:
> "The event-handle will be considered as if it was specified on a
> firstprivate clause. The use of a variable in a detach clause expression of a 
> task
> construct causes an implicit reference to the variable in all enclosing
> constructs."
> is that we should do:
>   case OMP_CLAUSE_DETACH:
>   decl = OMP_CLAUSE_DECL (c);
>   goto do_notice;
> which does the second sentence, and for the first sentence I believe it
> talks about the task construct rather than about the outer construct.
> So (again, something for testsuite):
> void
> foo (void)
> {
>   omp_event_handle_t t;
>   #pragma omp parallel master default (none) /* { dg-error "..." } */
>   {
> #pragma omp task detach (t)
> ;
>   }
> }
> The dg-error should be the usual error about t being referenced in the
> construct but not specified in the data sharing clauses on parallel.
> And then
> void
> bar (void)
> {
>   omp_event_handle_t t;
>   #pragma omp task detach (t) default (none)
>   omp_fullfill_event (t); // This should be ok, above first sentence says
> // that it is as if firstprivate (t)
> }
> 
> But I think it is actually even stronger than that,
>   #pragma omp task detach (t) firstprivate (t)
> and
>   #pragma omp task detach (t) shared (t)
> etc. should be invalid too (at least in pedantic reading).
> I guess we should ask on omp-lang.  If it actually works as firstprivate
> (t), perhaps we should handle it that way already in the FEs.

Asked and Alex said that both should be invalid.  Though, if we implement
detach as passing address of the variable to GOMP_task, if we implicitly
add firstprivate clause it would copy the value from before it has been
initialized.  One way to handle that would be not add firstprivate clause
next to detach, but treat detach like a firstprivate clause in most places,
and just for the passing pass it specially (let parent of task pass address
of the variable and let the receiving side recieve the value instead,
which would force task_cpyfn, or handle it more like we handle the bounds
of a taskloop - force the omp_eventhandler_t to be the first variable in the
structure and let GOMP_task write the address not just to *detach, but also
to the first element in the structure.

Jakub

[committed][Patch]arm: Fix typo in testcase mve-vsub_1.c

2020-12-10 Thread Dennis Zhang via Gcc-patches

This patch fixes a typo reported at 
https://gcc.gnu.org/pipermail/gcc-patches/2020-November/558478.html

gcc/testsuite/
* gcc.target/arm/simd/mve-vsub_1.c: Fix typo.
Remove needless dg-additional-options.

Cheers,
Dennisdiff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c b/gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c
index cb3ef3a14e0..842e5c6a30b 100644
--- a/gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vsub_1.c
@@ -1,7 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
 /* { dg-add-options arm_v8_1m_mve_fp } */
-/* { dg -additional-options "-O3 -funsafe-math-optimizations" } */
 /* { dg-additional-options "-O3" } */
 
 #include

Ping x2: [PATCH 1/2] Power10: Add IEEE 128-bit xsmaxcqp and xsmincqp support.

2020-12-10 Thread Michael Meissner via Gcc-patches

This patch has been around for quite some time.  It isn't critical for enabling
IEEE 128-bit long double, but it improves code generation for float128 on
power10.

I haven't received a reply for this patch:

| Date: Sun, 15 Nov 2020 23:50:51 -0500
| Subject: [PATCH 1/2] Power10: Add IEEE 128-bit xsmaxcqp and xsmincqp support.
| Message-ID: <20201116045051.ga3...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2020-November/559166.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797

Ping x2: [PATCH 2/2] Power10: Add IEEE 128-bit fp conditional move

2020-12-10 Thread Michael Meissner via Gcc-patches

This needs the first patch in the series to be applied first.  This patch is
not critical for enabling IEEE 128-bit long double, but it does improve
float128 code generation on power10.

I haven't received a reply for this patch:

| Date: Sun, 15 Nov 2020 23:53:20 -0500
| Subject: [PATCH 2/2] Power10: Add IEEE 128-bit fp conditional move
| Message-ID: <20201116045320.gb3...@ibm-toto.the-meissners.org>
| https://gcc.gnu.org/pipermail/gcc-patches/2020-November/559167.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797

Re: [PATCH][Arm] Auto-vectorization for MVE: vsub

2020-12-10 Thread Dennis Zhang via Gcc-patches

Hi Christophe,

> From: Christophe Lyon 
> Sent: Monday, November 9, 2020 1:38 PM
> To: Dennis Zhang
> Cc: Kyrylo Tkachov; gcc-patches@gcc.gnu.org; Richard Earnshaw; nd; Ramana 
> Radhakrishnan
> Subject: Re: [PATCH][Arm] Auto-vectorization for MVE: vsub
>
> Hi,
>
> I have just noticed that the new test has:
> /* { dg -additional-options "-O3 -funsafe-math-optimizations" } */
> /* { dg-additional-options "-O3" } */
> That is, the first line has a typo (space between dg and -additional-options),
> so the test is effectively compiled with -O3, and without
> -funsafe-math-optimizations
>
> Since I can see it passing, it looks like -funsafe-math-optimizations
> is not needed, can you clarify?
>
> Thanks

Thank you for the report. The '-funsafe-math-optimizations' option is not 
needed.
The typo is fixed by commit b46dd03fe94e2428cbcdbfc4d081d89ed604803a.

Bests
Dennis

Re: [PATCH] Add -Wtsan.

2020-12-10 Thread Martin Sebor via Gcc-patches


On 12/9/20 2:24 AM, Martin Liška wrote:

Hello.

The newly added warning is about warning a user
that std::atomic_thread_fence is not supported by TSAN.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

 PR sanitizer/97868
 * common.opt: Add new warning -Wtsan.
 * doc/invoke.texi: Likewise.
 * tsan.c (instrument_builtin_call): Warn users about unsupported
 std::atomic_thread_fence.
---
  gcc/common.opt  | 4 
  gcc/doc/invoke.texi | 8 +++-
  gcc/tsan.c  | 6 ++
  3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 6645539f5e5..6c24c7bbffb 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -842,6 +842,10 @@ Wvector-operation-performance
  Common Var(warn_vector_operation_performance) Warning
  Warn when a vector operation is compiled outside the SIMD.

+Wtsan
+Common Var(warn_tsan) Init(1) Warning
+Warn about unsupported features in the ThreadSanitizer.

  ^^^

Just a minor grammatical nit.  As a name, ThreadSanitizer should
not be preceded by an article.  Same in invoke.texi below.


+
  Xassembler
  Driver Separate

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index f7e8c8b29b0..5bd18c78e99 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -377,7 +377,7 @@ Objective-C and Objective-C++ Dialects}.
  -Wswitch  -Wno-switch-bool  -Wswitch-default  -Wswitch-enum @gol
  -Wno-switch-outside-range  -Wno-switch-unreachable  -Wsync-nand @gol
  -Wsystem-headers  -Wtautological-compare  -Wtrampolines  -Wtrigraphs @gol
--Wtype-limits  -Wundef @gol
+-Wtsan -Wtype-limits  -Wundef @gol
  -Wuninitialized  -Wunknown-pragmas @gol
  -Wunsuffixed-float-constants  -Wunused @gol
  -Wunused-but-set-parameter  -Wunused-but-set-variable @gol
@@ -7951,6 +7951,12 @@ Note that the code above is invalid in C++11.

  This warning is enabled by default.

+@item -Wtsan
+@opindex Wtsan
+@opindex Wno-tsan
+Warn about unsupported features in the ThreadSanitizer.
+This warning is enabled by default.
+
  @item -Wtype-limits
  @opindex Wtype-limits
  @opindex Wno-type-limits
diff --git a/gcc/tsan.c b/gcc/tsan.c
index 4d6223454b5..be9fabea62a 100644
--- a/gcc/tsan.c
+++ b/gcc/tsan.c
@@ -45,6 +45,7 @@ along with GCC; see the file COPYING3.  If not see
  #include "asan.h"
  #include "builtins.h"
  #include "target.h"
+#include "diagnostic-core.h"

  /* Number of instrumented memory accesses in the current function.  */

@@ -500,6 +501,11 @@ instrument_builtin_call (gimple_stmt_iterator *gsi)
    continue;
  else
    {
+    if (fcode == BUILT_IN_ATOMIC_THREAD_FENCE)
+  warning_at (gimple_location (stmt), OPT_Wtsan,
+  "%qs is not supported by ThreadSanitizer and may "
+  "lead to false positives", "atomic_thread_fence");


Most similar warnings mention the sanitizer option rather than
referring to the tool by name.  E.g.,

  "transactional memory is not supported with %<-fsanitize=address%>"
or

  "%<-fsanitize=leak%> is incompatible with %<-fsanitize=thread%>"

For the sake of consistency (and also to provide a bit of additional
detail) I would suggest to follow that style.

Martin


+
  tree decl = builtin_decl_implicit (tsan_atomic_table[i].tsan_fcode);
  if (decl == NULL_TREE)
    return;

Re: [PATCH] libstdc++: Add C++ runtime support for new 128-bit long double format

2020-12-10 Thread Jonathan Wakely via Gcc-patches


On 04/12/20 00:35 +, Jonathan Wakely wrote:

On 03/12/20 20:07 -0300, Tulio Magno Quites Machado Filho via Libstdc++ wrote:

Jonathan Wakely via Libstdc++  writes:


diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
index cbfdf4c6bad..d25842fef35 100644
--- a/libstdc++-v3/configure.ac
+++ b/libstdc++-v3/configure.ac
@@ -421,12 +425,43 @@ case "$target" in

port_specific_symbol_files="\$(top_srcdir)/config/os/gnu-linux/ldbl-extra.ver"
case "$target" in
  powerpc*-*-linux*)
-   LONG_DOUBLE_COMPAT_FLAGS="$LONG_DOUBLE_COMPAT_FLAGS -mno-gnu-attribute" 
;;
+   LONG_DOUBLE_COMPAT_FLAGS="$LONG_DOUBLE_COMPAT_FLAGS -mno-gnu-attribute"
+# Check for IEEE128 support in libm:
+AC_CHECK_LIB(m, frexpf128,


I suggest to replace frexpf128 with __frexpieee128.

The former is available on a glibc that support _Float128 (since glibc 2.26).
The later is available on a glibc that supports binary128 long double (since
glibc 2.32)


Hmm, yes, you pointed me to __frexpieee128 a few months ago, but for
some reason I either didn't switch to using it, or lost a patch when
squashing and rebasing branches. Hopefully I just forgot to change it,
but I'll double check to make sure I haven't left any work on an old
branch. Thanks for suggesting it (again!)


As expected, it still works with a check for __frexpieee128 instead.

So are you happy for me to push this to master with that change?

(It won't be until Tuesday now, as I have some time off).

Re: RFC: ARM MVE and Neon auto-vectorization

2020-12-10 Thread Christophe Lyon via Gcc-patches

On Wed, 9 Dec 2020 at 17:47, Richard Sandiford
 wrote:
>
> Christophe Lyon via Gcc-patches  writes:
> > Hi,
> >
> > I've been working for a while on enabling auto-vectorization for ARM
> > MVE, and I find it a bit awkward to keep things common with Neon as
> > much as possible.
> >
> > I've just sent a few patches for logical operators
> > (vand/vorr/veor/vbic), and I have a few more WIP patches where I
> > struggle to avoid duplication.
> >
> > For example, vneg is supported in different modes by MVE and Neon:
> > * Neon: VDQ and VH iterators: V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF
> > V2SF V4SF V2DI  and V8HF V4HF
> > * MVE: MVE_2 and MVE_0 iterators: V16QI V8HI V4SI and V8HF V4SF
>
> My hope behind the ARM_HAVE__ macros was that the common
> (optab) define_expand could use those, with the most permissive iterator
> necessary.  We could stick on a "&& !TARGET_IWMMXT" for things that
> aren't implemented for iwMMXt.
>
> The above combination seems like a natural fit for unmodified
> VDQ with ARM_HAVE__ARITH.  This would be similar to the
> existing add3 pattern.
>

OK, so it looks like I should revert/fix my just-committed vand patch,
and restore the unconditional definition of VDQ and use
ARM_HAVE__ARITH for the expander?

> > My 'vand' patch changes the definition of VDQ so that the relevant
> > modes are enabled only when !TARGET_HAVE_MVE (V8QI, ...), and this
> > helps writing a simpler expander.
> >
> > However, vneg is used by vshr (right-shifts by register are
> > implemented as left-shift by negation of that register), so the
> > expander uses something like:
> >
> >   emit_insn (gen_neg2 (neg, operands[2]));
> >   if (TARGET_NEON)
> >   emit_insn (gen_ashl3_signed (operands[0], operands[1], neg));
> >   else
> >   emit_insn (gen_mve_vshlq_s (operands[0], operands[1], neg));
> >
> > which does not work if the iterator has conditional members: the
> > 'else' part is still generated for  unsupported by MVE.
>
> FWIW, I agree with Andre that it would be good to remove unnecessary
> NEON/MVE differences like this.
>
OK thanks for the feedback, I'll update my other patches along these
lines. Too bad this will delay auto-vectorization improvement more
than I hoped :-(

> Another technique that can be used where necessary is to convert:
>
>   gen_foo (args)
>
> to:
>
>   gen_foo (mode, args)
>
> and add a @ to the start of the definition of pattern "foo".

Ha, indeed, thanks!

> Thanks,
> Richard

[PATCH] c++: Diagnose unstable satisfaction results

2020-12-10 Thread Patrick Palka via Gcc-patches

This implements lightweight heuristical detection and diagnosing of
satisfaction results that change at different points in the program,
which renders the program as ill-formed NDR as of P2014.  We've recently
started to more aggressively cache satisfaction results, and so the goal
here is to make this caching behavior more transparent to users.

A satisfaction result is flagged as "potentially unstable" (at the atom
granularity) if during its computation, some type completion failure
occurs.  This is detected by making complete_type_or_maybe_complain
increment a counter upon failure and comparing the value of the counter
before and after satisfaction.  (We don't instrument complete_type
directly because it's used "opportunistically" in many spots where type
completion failure doesn't necessary lead to substitution failure.)

Flagged satisfaction results are always recomputed from scratch, even
when performing satisfaction quietly.  We then compare the recomputed
result with the cached result, and if they differ, proceed with
diagnosing the instability.  (We may also unflag a result if it turned
out to be independent of the previously detected type completion
failure.)  When performing satisfaction noisily, we always check
instability.

Most of the implementation is confined to the satisfaction_cache class,
which has been completely rewritten.

Bootstrapped and regtested on x86_64-pc-linux-gnu, and also tested on
cmcstl2 and range-v3.  The static_assert failures in the view.join test
from cmcstl2 are now elaborated on after this patch, and additionally
the alg.equal_range test now fails for the same reason as the view.join
test.

gcc/cp/ChangeLog:

* constraint.cc (failed_type_completion_count): New.
(note_failed_type_completion_for_satisfaction): New.
(sat_entry::constr): Rename to ...
(sat_entry::atom): ... this.
(sat_entry::location): New member.
(sat_entry::maybe_unstable): New member.
(sat_entry::diagnose_instability): New member.
(struct sat_hasher): Adjust after the above renaming.
(get_satisfaction, save_satisfaction): Remove.
(satisfaction_cache): Rewrite completely.
(satisfy_atom): When instantiation of the parameter mapping
fails, set diagnose_instability.  Propagate location from
inst_cache.entry to cache.entry if the secondary lookup
succeeded.
(satisfy_declaration_constraints): When
failed_type_completion_count differs before and after
satisfaction, then don't cache the satisfaction result.
* cp-tree.h (note_failed_type_completion_for_satisfaction):
Declare.
* pt.c (tsubst) : Use
complete_type_or_maybe_complain instead of open-coding it.
* typeck.c (complete_type_or_maybe_complain): Call
note_failed_type_completion_for_satisfaction when type
completion fails.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-complete1.C: New test.
* g++.dg/cpp2a/concepts-complete2.C: New test.
* g++.dg/cpp2a/concepts-complete3.C: New test.
---
 gcc/cp/constraint.cc  | 283 ++
 gcc/cp/cp-tree.h  |   2 +
 gcc/cp/pt.c   |   9 +-
 gcc/cp/typeck.c   |   1 +
 .../g++.dg/cpp2a/concepts-complete1.C |  18 ++
 .../g++.dg/cpp2a/concepts-complete2.C |  23 ++
 .../g++.dg/cpp2a/concepts-complete3.C |  16 +
 7 files changed, 282 insertions(+), 70 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-complete1.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-complete2.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-complete3.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 73c038e3afe..ee702b34d01 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -2374,35 +2374,82 @@ tsubst_parameter_mapping (tree map, tree args, 
tsubst_flags_t complain, tree in_
 Constraint satisfaction
 ---*/
 
-/* Hash functions for satisfaction entries.  */
+/* A counter incremented by note_failed_type_completion_for_satisfaction().
+   It's used by the satisfaction caches in order to flag "potentially unstable"
+   satisfaction results.  */
+
+static unsigned failed_type_completion_count;
+
+/* Called whenever a type completion failure occurs that definitely affects
+   the semantics of the program, by e.g. inducing substitution failure.  */
+
+void
+note_failed_type_completion_for_satisfaction (tree type)
+{
+  gcc_checking_assert (!COMPLETE_TYPE_P (type));
+  if (CLASS_TYPE_P (type)
+  && CLASSTYPE_TEMPLATE_INSTANTIATION (type))
+/* After instantiation, an incomplete class template specialization
+   will always be incomplete, so we don't increment the counter in this
+   case.  */;
+  else
+++failed_type_comple

Re: [PATCH v5] Practical Improvement to libgcc Complex Divide

2020-12-10 Thread Patrick McGehearty via Gcc-patches


Thank you for your rapid feedback.
I'll fix the various formatting issues (spaces in the wrong places
and such as well as revise the Changelog magic) in the next submission.
It will wait for Joseph's review to also make any changes he suggests.
I'll also try to train myself to be more sensitive to gcc formatting
conventions while proofreading.

I'm reluctant to change or use XALLOCAVEC instead of alloca as that
is not the current style elsewhere in the routine.

I agree that namelen=strlen(name) would be an apparent optimization,
but since *name is declared const char, I would think the compiler
would need to compute strlen(name) only one time. Again, I'm reluctant
to change the existing code patterns.

On the strcpy, strncpy, and memcpy question, given short length of
the string being copied, I don't think it makes much difference.
The two other copy operations in the file are memcpy.
memcpy might be slightly better since it is generally more frequently
seen and more likely that gcc has special case code to inline
short fixed length memcpy as a few assignments. Even if both strncpy
and memcpy are inlined, the memcpy code may be simplier as it does
not need to be concerned with special treatment of nulls.
I'll change the strncpy to memcpy.

- patrick



On 12/8/2020 5:31 PM, Jakub Jelinek wrote:

On Tue, Dec 08, 2020 at 10:32:33PM +, Patrick McGehearty via Gcc-patches 
wrote:

2020-12-08 Patrick McGehearty 

* gcc/c-family/c-cppbuiltin.c - Add supporting macros for new complex divide.
* libgcc2.c (__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
* libgcc/config/rs6000/_divkc3.c - Complex divide changes for rs6000.
* gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkd.c - double cdiv test.
* gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkf.c - float cdiv test.
* gcc/testsuite/gcc.c-torture/execute/ieee/cdivchkld.c - long double cdiv test.

Thanks for working on this, I'll defer review to Joseph, just want to add a few
random comments.
The above ChangeLog will not get through the commit checking scripts,
one needs two spaces before and after name instead of just one,
pathnames should be relative to the corresponding ChangeLog file and one
should separate what goes to each ChangeLog, and lines except the first one
should be tab indented.  So it should look like:

2020-12-08  Patrick McGehearty  

gcc/c-family/
* c-cppbuiltin.c (c_cpp_builtins): Add supporting macros for new
complex divide.
libgcc/
* libgcc2.c (XMTYPE, XCTYPE, RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
Define.
(__divsc3, __divdc3, __divxc3, __divtc3): Improve complex divide.
* config/rs6000/_divkc3.c (RBIG, RMIN, RMIN2, RMINSCAL, RMAX2):
Define.
(__divkc3): Improve complex divide.
gcc/testsuite/
* gcc.c-torture/execute/ieee/cdivchkd.c: New test.
* gcc.c-torture/execute/ieee/cdivchkf.c: New test.
* gcc.c-torture/execute/ieee/cdivchkld.c: New test.

or so.


--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -1347,6 +1347,47 @@ c_cpp_builtins (cpp_reader *pfile)
  "PRECISION__"));
  sprintf (macro_name, "__LIBGCC_%s_EXCESS_PRECISION__", name);
  builtin_define_with_int_value (macro_name, excess_precision);
+
+ if ((mode == TYPE_MODE (float_type_node))
+ || (mode == TYPE_MODE (double_type_node))
+ || (mode == TYPE_MODE (long_double_type_node)))
+   {
+ char val_name[64];
+ char fname[8] = "";
+ if (mode == TYPE_MODE (float_type_node))
+   strncpy(fname, "FLT",4);

Formatting, there should be space before ( for calls, and space in between
, and 4.  Also, what is the point of using strncpy?  strcpy or
memcpy would do.


+ else if (mode == TYPE_MODE (double_type_node))
+   strncpy(fname, "DBL",4);
+ else if (mode == TYPE_MODE (long_double_type_node))
+   strncpy(fname, "LDBL",5);
+
+ if ( (mode == TYPE_MODE (float_type_node))
+  || (mode == TYPE_MODE (double_type_node)) )

Formatting, no spaces in between the ( ( and ) ).

+   {
+ macro_name = (char *) alloca (strlen (name)
+   + sizeof ("__LIBGCC_EPSILON__"
+ ));

This should use XALLOCAVEC macro, so
  macro_name
= XALLOCAVEC (char, strlen (name)
+ sizeof ("__LIBGCC_EPSILON__"));

I admit it is a preexisting problem in the code above it too.


+ sprintf (macro_name, "__LIBGCC_%s_EPSILON__", name);
+ sprintf( val_name, "__%s_EPSILON__", fname);

Space before ( rather than after it.


+ builtin_define_with_value (macro_name, val_name, 0);
+   }
+
+ macro_name = (char *) alloca (strlen (

c++: modularize spelling suggestions

2020-12-10 Thread Nathan Sidwell



This augments the spelling suggestion code to understand about visible
imported modules.  Simply consider each visible binding in the
binding_vector, until we find one that has something of interest.

gcc/cp/
* name-lookup.c: Include bitmap.h.
(enum binding_slots): New.
(maybe_add_fuzzy_binding): Return bool true if found.
(consider_binding_level): Add module support.
* module.cc (get_import_bitmap): Stub.

pushing to trunk
--
Nathan Sidwell
diff --git i/gcc/cp/module.cc w/gcc/cp/module.cc
index 176286cdd91..dc4fa41bbc4 100644
--- i/gcc/cp/module.cc
+++ w/gcc/cp/module.cc
@@ -80,6 +80,12 @@ module_name (unsigned, bool)
   return nullptr;
 }
 
+bitmap
+get_import_bitmap ()
+{
+  return nullptr;
+}
+
 void
 mangle_module (int, bool)
 {
diff --git i/gcc/cp/name-lookup.c w/gcc/cp/name-lookup.c
index 051ef0b36b1..c921baa6301 100644
--- i/gcc/cp/name-lookup.c
+++ w/gcc/cp/name-lookup.c
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "c-family/name-hint.h"
 #include "c-family/known-headers.h"
 #include "c-family/c-spellcheck.h"
+#include "bitmap.h"
 
 static cxx_binding *cxx_binding_make (tree value, tree type);
 static cp_binding_level *innermost_nonclass_level (void);
@@ -46,6 +47,18 @@ static name_hint maybe_suggest_missing_std_header (location_t location,
 static name_hint suggest_alternatives_for_1 (location_t location, tree name,
 	 bool suggest_misspellings);
 
+/* Slots in BINDING_VECTOR.  */
+enum binding_slots
+{
+ BINDING_SLOT_CURRENT,	/* Slot for current TU.  */
+ BINDING_SLOT_GLOBAL,	/* Slot for merged global module. */
+ BINDING_SLOT_PARTITION, /* Slot for merged partition entities
+			(optional).  */
+
+ /* Number of always-allocated slots.  */
+ BINDING_SLOTS_FIXED = BINDING_SLOT_GLOBAL + 1
+};
+
 /* Create an overload suitable for recording an artificial TYPE_DECL
and another decl.  We use this machanism to implement the struct
stat hack within a namespace.  It'd be nice to use it everywhere.  */
@@ -6137,9 +6150,10 @@ maybe_add_fuzzy_decl (auto_vec &vec, tree decl)
 }
 
 /* Examing the namespace binding BINDING, and add at most one instance
-   of the name, if it contains a visible entity of interest.  */
+   of the name, if it contains a visible entity of interest.  Return
+   true if we added something.  */
 
-void
+bool
 maybe_add_fuzzy_binding (auto_vec &vec, tree binding,
 			  lookup_name_fuzzy_kind kind)
 {
@@ -6151,7 +6165,7 @@ maybe_add_fuzzy_binding (auto_vec &vec, tree binding,
 	  && STAT_TYPE (binding))
 	{
 	  if (maybe_add_fuzzy_decl (vec, STAT_TYPE (binding)))
-	return;
+	return true;
 	}
   else if (!STAT_DECL_HIDDEN_P (binding))
 	value = STAT_DECL (binding);
@@ -6166,8 +6180,11 @@ maybe_add_fuzzy_binding (auto_vec &vec, tree binding,
   if (kind != FUZZY_LOOKUP_TYPENAME
 	  || TREE_CODE (STRIP_TEMPLATE (value)) == TYPE_DECL)
 	if (maybe_add_fuzzy_decl (vec, value))
-	  return;
+	  return true;
 }
+
+  /* Nothing found.  */
+  return false;
 }
 
 /* Helper function for lookup_name_fuzzy.
@@ -6233,8 +6250,54 @@ consider_binding_level (tree name, best_match  &bm,
 	(DECL_NAMESPACE_BINDINGS (ns)->end ());
   for (hash_table::iterator iter
 	 (DECL_NAMESPACE_BINDINGS (ns)->begin ()); iter != end; ++iter)
-	maybe_add_fuzzy_binding (vec, *iter, kind);
+	{
+	  tree binding = *iter;
 
+	  if (TREE_CODE (binding) == BINDING_VECTOR)
+	{
+	  bitmap imports = get_import_bitmap ();
+	  binding_cluster *cluster = BINDING_VECTOR_CLUSTER_BASE (binding);
+
+	  if (tree bind = cluster->slots[BINDING_SLOT_CURRENT])
+		if (maybe_add_fuzzy_binding (vec, bind, kind))
+		  continue;
+
+	  /* Scan the imported bindings.  */
+	  unsigned ix = BINDING_VECTOR_NUM_CLUSTERS (binding);
+	  if (BINDING_VECTOR_SLOTS_PER_CLUSTER == BINDING_SLOTS_FIXED)
+		{
+		  ix--;
+		  cluster++;
+		}
+
+	  for (; ix--; cluster++)
+		for (unsigned jx = 0; jx != BINDING_VECTOR_SLOTS_PER_CLUSTER;
+		 jx++)
+		  {
+		/* Are we importing this module?  */
+		if (unsigned base = cluster->indices[jx].base)
+		  if (unsigned span = cluster->indices[jx].span)
+			do
+			  if (bitmap_bit_p (imports, base))
+			goto found;
+			while (++base, --span);
+		continue;
+
+		  found:;
+		/* Is it loaded?  */
+		if (cluster->slots[jx].is_lazy ())
+		  /* Let's not read in everything on the first
+			 spello! **/
+		  continue;
+		if (tree bind = cluster->slots[jx])
+		  if (maybe_add_fuzzy_binding (vec, bind, kind))
+			break;
+		  }
+	}
+	  else
+	maybe_add_fuzzy_binding (vec, binding, kind);
+	}
+	
   vec.qsort ([] (const void *a_, const void *b_)
 		 {
 		   return strcmp (IDENTIFIER_POINTER (*(const tree *)a_),

Re: [PATCH 1/8 v4] Dead-field warning in structs at LTO-time

2020-12-10 Thread Martin Sebor via Gcc-patches


On 12/9/20 4:09 PM, Eric Gallager via Gcc-patches wrote:

On Fri, Dec 4, 2020 at 4:58 AM Erick Ochoa <
erick.oc...@theobroma-systems.com> wrote:



This commit includes the following components:

Type-based escape analysis to determine structs that can be modified at
link-time.
Field access analysis to determine which fields are never read.

The type-based escape analysis provides a list of types, that are not
visible outside of the current linking unit (e.g. parameter types of
external
functions).

The field access analyses non-escaping structs for fields that
are not used in the linking unit and thus can be removed.

2020-11-04  Erick Ochoa  

  * Makefile.in: Add file to list of new sources.
  * common.opt: Add new flags.
  * ipa-type-escape-analysis.c: New file.
---
   gcc/Makefile.in|1 +
   gcc/common.opt |8 +
   gcc/ipa-type-escape-analysis.c | 3428 
   gcc/ipa-type-escape-analysis.h | 1152 +++
   gcc/passes.def |1 +
   gcc/timevar.def|1 +
   gcc/tree-pass.h|2 +
   7 files changed, 4593 insertions(+)
   create mode 100644 gcc/ipa-type-escape-analysis.c
   create mode 100644 gcc/ipa-type-escape-analysis.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 978a08f7b04..8b18c9217a2 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1415,6 +1415,7 @@ OBJS = \
 incpath.o \
 init-regs.o \
 internal-fn.o \
+   ipa-type-escape-analysis.o \
 ipa-cp.o \
 ipa-sra.o \
 ipa-devirt.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index d4cbb2f86a5..85351738a29 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3460,4 +3460,12 @@ fipa-ra
   Common Report Var(flag_ipa_ra) Optimization
   Use caller save register across calls if possible.
   +fipa-type-escape-analysis
+Common Report Var(flag_ipa_type_escape_analysis) Optimization
+This flag is only used for debugging the type escape analysis
+
+Wdfa
+Common Var(warn_dfa) Init(1) Warning
+Warn about dead fields at link time.
+



I don't really like the name "-Wdfa" very much; could you maybe come up
with a longer and more descriptive name instead? Say, "-Wunused-field" or
"-Wunused-private-field" depending on the kind of field:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72789
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92801


I second that.  Also, invoke.texi needs to document the new option
and if it's only active with LTO it should mention that.  I looked
to see how the warning is used so I have some more comments on that
code (I didn't review the rest of the patch).



+// Obtain nonescaping unaccessed fields
+static record_field_offset_map_t
+obtain_nonescaping_unaccessed_fields (tpartitions_t casting,
+ record_field_map_t record_field_map)
+{
+  bool has_fields_that_can_be_deleted = false;
+  record_field_offset_map_t record_field_offset_map;
+  for (std::map::iterator i
+   = record_field_map.begin (),
+   e = record_field_map.end ();
+   i != e; ++i)
+{
+  tree r_i = i->first;
+  std::vector equivalence
+   = find_equivalent_trees (r_i, record_field_map, casting);
+  field_offsets_t field_offset;
+  field_access_map_t original_field_map = record_field_map[r_i];
+  keep_only_read_fields_from_field_map (original_field_map,
field_offset);
+  keep_only_read_fields_from_equivalent_field_maps (equivalence,
+   record_field_map,
+   field_offset);
+  // These map holds the following:
+  // RECORD_TYPE -> unsigned (bit_pos_offset which has been read)
+  record_field_offset_map[r_i] = field_offset;
+}
+
+  // So now that we only have the FIELDS which are read,
+  // we need to compute the complement...
+
+  // Improve: This is tightly coupled, I need to decouple it...
+  std::set to_erase;
+  std::set to_keep;
+  mark_escaping_types_to_be_deleted (record_field_offset_map, to_erase,
+casting);
+  for (std::map::iterator i
+   = record_field_offset_map.begin (),
+   e = record_field_offset_map.end ();
+   i != e; ++i)
+{
+  tree record = i->first;
+  const bool will_be_erased = to_erase.find (record) !=
to_erase.end ();
+  // No need to compute which fields can be deleted if type is
escaping
+  if (will_be_erased)
+   continue;
+
+  field_offsets_t field_offset = i->second;
+  for (tree field = TYPE_FIELDS (record); field; field = DECL_CHAIN
(field))
+   {
+ unsigned f_offset = bitpos_of_field (field);
+ bool in_set2 = field_offset.find (f_offset) != field_offset.end
();
+ if (in_set2)
+   {
+ field_offset.erase (f_offset);
+ continue;
+   }
+ to_keep.insert (record);
+ field_offset.inser

[PATCH]AArch64: Add NEON, SVE and SVE2 RTL patterns for Complex Addition, Multiply and FMA.

2020-12-10 Thread Tamar Christina via Gcc-patches

Hi All,

This adds implementation for the optabs for complex operations.  With this the
following C code:

  void f90 (float complex a[restrict N], float complex b[restrict N],
float complex c[restrict N])
  {
for (int i=0; i < N; i++)
  c[i] = a[i] + (b[i] * I);
  }

generates

  f90:
  mov x3, 0
  .p2align 3,,7
  .L2:
  ldr q0, [x0, x3]
  ldr q1, [x1, x3]
  fcadd   v0.4s, v0.4s, v1.4s, #90
  str q0, [x2, x3]
  add x3, x3, 16
  cmp x3, 1600
  bne .L2
  ret

instead of

  f90:
  add x3, x1, 1600
  .p2align 3,,7
  .L2:
  ld2 {v4.4s - v5.4s}, [x0], 32
  ld2 {v2.4s - v3.4s}, [x1], 32
  fsubv0.4s, v4.4s, v3.4s
  faddv1.4s, v5.4s, v2.4s
  st2 {v0.4s - v1.4s}, [x2], 32
  cmp x3, x1
  bne .L2
  ret

It defined a new iterator VALL_ARITH which contains types for which we can do
general arithmetic (excludes bfloat16).

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Checked with armv8-a+sve2+fp16 and no issues.  Note that sue to a mid-end
limitation SLP for SVE currently fails for some permutes.  The tests have these
marked as XFAIL.  I do intend to fix this soon. 

Matching tests for these are in the mid-end patches.
Note that The mid-end patches are still being respun and I may need to
change the order of some parameters but no other change is expected and
would like to decrease the size of future patches.  As such..

Ok for master?

Thanks,
Tamar


gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (cadd3,
cml4, cmul3): New.
* config/aarch64/iterators.md (VALL_ARITH, UNSPEC_FCMUL,
UNSPEC_FCMUL180, UNSPEC_FCMLS, UNSPEC_FCMLS180, UNSPEC_CMLS,
UNSPEC_CMLS180, UNSPEC_CMUL, UNSPEC_CMUL180, FCMLA_OP, FCMUL_OP, rot_op,
rotsplit1, rotsplit2, fcmac1, sve_rot1, sve_rot2, SVE2_INT_CMLA_OP,
SVE2_INT_CMUL_OP, SVE2_INT_CADD_OP): New.): New.): New.
(rot): Add UNSPEC_FCMLS, UNSPEC_FCMUL, UNSPEC_FCMUL180.
* config/aarch64/aarch64-sve.md (cadd3,
cml4, cmul3): New.
* config/aarch64/aarch64-sve2.md (cadd3,
cml4, cmul3): New.

-- 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 68baf416045178b0ebcfeb8de2d201f625f1c317..1aa74b154e054f2a01f8843dfed218fe850b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -449,6 +449,14 @@ (define_insn "aarch64_fcadd"
   [(set_attr "type" "neon_fcadd")]
 )
 
+(define_expand "cadd3"
+  [(set (match_operand:VHSDF 0 "register_operand")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
+		   (match_operand:VHSDF 2 "register_operand")]
+		   FCADD))]
+  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+)
+
 (define_insn "aarch64_fcmla"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
@@ -508,6 +516,47 @@ (define_insn "aarch64_fcmlaq_lane"
   [(set_attr "type" "neon_fcmla")]
 )
 
+;; The complex mla/mls operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cml4"
+  [(set (match_operand:VHSDF 0 "register_operand")
+	(plus:VHSDF (match_operand:VHSDF 1 "register_operand")
+		(unspec:VHSDF [(match_operand:VHSDF 2 "register_operand")
+   (match_operand:VHSDF 3 "register_operand")]
+   FCMLA_OP)))]
+  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+{
+  rtx tmp = gen_reg_rtx (mode);
+  emit_insn (gen_aarch64_fcmla (tmp, operands[1],
+		 operands[2], operands[3]));
+  emit_insn (gen_aarch64_fcmla (operands[0], tmp,
+		 operands[2], operands[3]));
+  DONE;
+})
+
+;; The complex mul operations always need to expand to two instructions.
+;; The first operation does half the computation and the second does the
+;; remainder.  Because of this, expand early.
+(define_expand "cmul3"
+  [(set (match_operand:VHSDF 0 "register_operand")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
+		   (match_operand:VHSDF 2 "register_operand")]
+		   FCMUL_OP))]
+  "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
+{
+  rtx tmp = gen_reg_rtx (mode);
+  rtx res1 = gen_reg_rtx (mode);
+  emit_move_insn (tmp, CONST0_RTX (mode));
+  emit_insn (gen_aarch64_fcmla (res1, tmp,
+		 operands[1], operands[2]));
+  emit_insn (gen_aarch64_fcmla (operands[0], res1,
+		 operands[1], operands[2]));
+  DONE;
+})
+
+
+
 ;; These instructions map to the __builtins for the Dot Product operations.
 (define_insn "aarch64_dot"
   [(set (match_operand:VS 0 "register_operand" "=w")
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 6359c40bdecda6c126bd70bef66561dd1da44dc9..7d27a84016d687cb6c019f98b99a7aacf8b3a031 100644
--- a/gcc/config/aarch64/aarch64-sve

[PATCH]Arm: Add NEON and MVE RTL patterns for Complex Addition, Multiply and FMA. Fix mve types Fix mve patterns

2020-12-10 Thread Tamar Christina via Gcc-patches

Hi All,

This adds implementation for the optabs for complex additions.  With this the
following C code:

  void f90 (float complex a[restrict N], float complex b[restrict N],
float complex c[restrict N])
  {
for (int i=0; i < N; i++)
  c[i] = a[i] + (b[i] * I);
  }

generates

  f90:
  add r3, r2, #1600
  .L2:
  vld1.32 {q8}, [r0]!
  vld1.32 {q9}, [r1]!
  vcadd.f32   q8, q8, q9, #90
  vst1.32 {q8}, [r2]!
  cmp r3, r2
  bne .L2
  bx  lr


instead of

  f90:
  add r3, r2, #1600
  .L2:
  vld2.32 {d24-d27}, [r0]!
  vld2.32 {d20-d23}, [r1]!
  vsub.f32  q8, q12, q11
  vadd.f32  q9, q13, q10
  vst2.32 {d16-d19}, [r2]!
  cmp r3, r2
  bne .L2
  bx  lr

Bootstrapped Regtested on arm-none-linux-gnueabihf and no issues.
Codegen tested for -march=armv8.1-m.main+mve.fp -mfloat-abi=hard -mfpu=auto
and no issues.

Matching tests for these are in the mid-end patches.
Note that The mid-end patches are still being respun and I may need to
change the order of some parameters but no other change is expected and
would like to decrease the size of future patches.  As such..

Ok for master?

Thanks,
Tamar

Tamar

gcc/ChangeLog:

* config/arm/arm_mve.h (__arm_vcaddq_rot90_u8, __arm_vcaddq_rot270_u8,
, __arm_vcaddq_rot90_s8, __arm_vcaddq_rot270_s8,
__arm_vcaddq_rot90_u16, __arm_vcaddq_rot270_u16, __arm_vcaddq_rot90_s16,
__arm_vcaddq_rot270_s16, __arm_vcaddq_rot90_u32,
__arm_vcaddq_rot270_u32, __arm_vcaddq_rot90_s32,
__arm_vcaddq_rot270_s32, __arm_vcmulq_rot90_f16,
__arm_vcmulq_rot270_f16, __arm_vcmulq_rot180_f16,
__arm_vcmulq_f16, __arm_vcaddq_rot90_f16, __arm_vcaddq_rot270_f16,
__arm_vcmulq_rot90_f32, __arm_vcmulq_rot270_f32,
__arm_vcmulq_rot180_f32, __arm_vcmulq_f32, __arm_vcaddq_rot90_f32,
__arm_vcaddq_rot270_f32, __arm_vcmlaq_f16, __arm_vcmlaq_rot180_f16,
__arm_vcmlaq_rot270_f16, __arm_vcmlaq_rot90_f16, __arm_vcmlaq_f32,
__arm_vcmlaq_rot180_f32, __arm_vcmlaq_rot270_f32,
__arm_vcmlaq_rot90_f32): Update builtin calls.
* config/arm/arm_mve_builtins.def (vcaddq_rot90_u, vcaddq_rot270_u,
vcaddq_rot90_s, vcaddq_rot270_s, vcaddq_rot90_f, vcaddq_rot270_f,
vcmulq_f, vcmulq_rot90_f, vcmulq_rot180_f, vcmulq_rot270_f,
vcmlaq_f, vcmlaq_rot90_f, vcmlaq_rot180_f, vcmlaq_rot270_f): Removed.
(vcaddq_rot90, vcaddq_rot270, vcmulq, vcmulq_rot90, vcmulq_rot180,
vcmulq_rot270, vcmlaq, vcmlaq_rot90, vcmlaq_rot180, vcmlaq_rot270):
New.
* config/arm/constraints.md (Dz): Include MVE.
* config/arm/iterators.md (mve_rotsplit1, mve_rotsplit2): New.
(rot): Add UNSPEC_VCMLS, UNSPEC_VCMUL and UNSPEC_VCMUL180.
(rot_op, rotsplit1, rotsplit2, fcmac1, VCMLA_OP, VCMUL_OP): New.
* config/arm/mve.md (VCADDQ_ROT270_S, VCADDQ_ROT90_S, VCADDQ_ROT270_U,
VCADDQ_ROT90_U, VCADDQ_ROT270_F, VCADDQ_ROT90_F, VCMULQ_F,
VCMULQ_ROT180_F, VCMULQ_ROT270_F, VCMULQ_ROT90_F, VCMLAQ_F,
VCMLAQ_ROT180_F, VCMLAQ_ROT90_F, VCMLAQ_ROT270_F, VCADDQ_ROT270_S,
VCADDQ_ROT270, VCADDQ_ROT90): Removed.
(mve_rot, VCMUL): New.
(mve_vcaddq_rot270_,
mve_vcaddq_rot270_f, mve_vcaddq_rot90_f, mve_vcmulq_f, mve_vcmulq_rot270_f,
mve_vcmulq_rot90_f, mve_vcmlaq_f, mve_vcmlaq_rot180_f,
mve_vcmlaq_rot270_f, mve_vcmlaq_rot90_f): Removed.
(mve_vcmlaq, mve_vcmulq,
mve_vcaddq, cadd3, mve_vcaddq):
New.
(cmul3): Exclude MVE types.
* config/arm/unspecs.md (UNSPEC_VCMUL90, UNSPEC_VCMUL270): New.
* config/arm/vec-common.md (cadd3, cmul3,
arm_vcmla, cml4): New.
* config/arm/unspecs.md (UNSPEC_VCMUL, UNSPEC_VCMUL180, UNSPEC_VCMLS,
UNSPEC_VCMLS180): New.

-- 
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index 6c0d1e2e634a32196eb31079166a7733dcd3a4b6..45014621f2533497e90ddf5257fb04e1fd9325b4 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -3981,14 +3981,16 @@ __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot90_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_mve_vcaddq_rot90_uv16qi (__a, __b);
+  return (uint8x16_t)
+__builtin_mve_vcaddq_rot90v16qi ((int8x16_t)__a, (int8x16_t)__b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcaddq_rot270_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_mve_vcaddq_rot270_uv16qi (__a, __b);
+  return (uint8x16_t)
+__builtin_mve_vcaddq_rot270v16qi ((int8x16_t)__a, (int8x16_t)__b);
 }
 
 __extension__ extern __inline uint8x16_t
@@ -4520,14 +4522,14 @@ __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__,

Re: [PATCH] libstdc++: Add C++ runtime support for new 128-bit long double format

2020-12-10 Thread Peter Bergner via Gcc-patches

On 12/10/20 10:14 AM, Jonathan Wakely wrote:
> On 04/12/20 00:35 +, Jonathan Wakely wrote:
>> On 03/12/20 20:07 -0300, Tulio Magno Quites Machado Filho via Libstdc++ 
>> wrote:
>>> I suggest to replace frexpf128 with __frexpieee128.
>>>
>>> The former is available on a glibc that support _Float128 (since glibc 
>>> 2.26).
>>> The later is available on a glibc that supports binary128 long double (since
>>> glibc 2.32)
>>
>> Hmm, yes, you pointed me to __frexpieee128 a few months ago, but for
>> some reason I either didn't switch to using it, or lost a patch when
>> squashing and rebasing branches. Hopefully I just forgot to change it,
>> but I'll double check to make sure I haven't left any work on an old
>> branch. Thanks for suggesting it (again!)
> 
> As expected, it still works with a check for __frexpieee128 instead.
> 
> So are you happy for me to push this to master with that change?
> 
> (It won't be until Tuesday now, as I have some time off).

FYI, Tulio is on vacation through December 22nd and I'm not sure how
closely he is watching his email, if at all.  Given Tulio mentioned
the change in the first place, I think he's ok with the change.
He can beat me later if I'm wrong! :-)

Peter

RE: [PATCH]Arm: Add NEON and MVE RTL patterns for Complex Addition, Multiply and FMA. Fix mve types Fix mve patterns

2020-12-10 Thread Kyrylo Tkachov via Gcc-patches



> -Original Message-
> From: Tamar Christina 
> Sent: 10 December 2020 17:00
> To: gcc-patches@gcc.gnu.org
> Cc: nd ; Ramana Radhakrishnan
> ; Richard Earnshaw
> ; ni...@redhat.com; Kyrylo Tkachov
> 
> Subject: [PATCH]Arm: Add NEON and MVE RTL patterns for Complex Addition,
> Multiply and FMA. Fix mve types Fix mve patterns
> 
> Hi All,
> 
> This adds implementation for the optabs for complex additions.  With this
> the
> following C code:
> 
>   void f90 (float complex a[restrict N], float complex b[restrict N],
>   float complex c[restrict N])
>   {
> for (int i=0; i < N; i++)
>   c[i] = a[i] + (b[i] * I);
>   }
> 
> generates
> 
>   f90:
> add r3, r2, #1600
>   .L2:
> vld1.32 {q8}, [r0]!
> vld1.32 {q9}, [r1]!
> vcadd.f32   q8, q8, q9, #90
> vst1.32 {q8}, [r2]!
> cmp r3, r2
> bne .L2
> bx  lr
> 
> 
> instead of
> 
>   f90:
> add r3, r2, #1600
>   .L2:
> vld2.32 {d24-d27}, [r0]!
> vld2.32 {d20-d23}, [r1]!
> vsub.f32  q8, q12, q11
> vadd.f32  q9, q13, q10
> vst2.32 {d16-d19}, [r2]!
> cmp r3, r2
> bne .L2
> bx  lr
> 
> Bootstrapped Regtested on arm-none-linux-gnueabihf and no issues.
> Codegen tested for -march=armv8.1-m.main+mve.fp -mfloat-abi=hard -
> mfpu=auto
> and no issues.
> 
> Matching tests for these are in the mid-end patches.
> Note that The mid-end patches are still being respun and I may need to
> change the order of some parameters but no other change is expected and
> would like to decrease the size of future patches.  As such..
> 
> Ok for master?

Ok.
Thanks,
Kyrill

> 
> Thanks,
> Tamar
> 
> Tamar
> 
> gcc/ChangeLog:
> 
>   * config/arm/arm_mve.h (__arm_vcaddq_rot90_u8,
> __arm_vcaddq_rot270_u8,
>   , __arm_vcaddq_rot90_s8, __arm_vcaddq_rot270_s8,
>   __arm_vcaddq_rot90_u16, __arm_vcaddq_rot270_u16,
> __arm_vcaddq_rot90_s16,
>   __arm_vcaddq_rot270_s16, __arm_vcaddq_rot90_u32,
>   __arm_vcaddq_rot270_u32, __arm_vcaddq_rot90_s32,
>   __arm_vcaddq_rot270_s32, __arm_vcmulq_rot90_f16,
>   __arm_vcmulq_rot270_f16, __arm_vcmulq_rot180_f16,
>   __arm_vcmulq_f16, __arm_vcaddq_rot90_f16,
> __arm_vcaddq_rot270_f16,
>   __arm_vcmulq_rot90_f32, __arm_vcmulq_rot270_f32,
>   __arm_vcmulq_rot180_f32, __arm_vcmulq_f32,
> __arm_vcaddq_rot90_f32,
>   __arm_vcaddq_rot270_f32, __arm_vcmlaq_f16,
> __arm_vcmlaq_rot180_f16,
>   __arm_vcmlaq_rot270_f16, __arm_vcmlaq_rot90_f16,
> __arm_vcmlaq_f32,
>   __arm_vcmlaq_rot180_f32, __arm_vcmlaq_rot270_f32,
>   __arm_vcmlaq_rot90_f32): Update builtin calls.
>   * config/arm/arm_mve_builtins.def (vcaddq_rot90_u,
> vcaddq_rot270_u,
>   vcaddq_rot90_s, vcaddq_rot270_s, vcaddq_rot90_f,
> vcaddq_rot270_f,
>   vcmulq_f, vcmulq_rot90_f, vcmulq_rot180_f, vcmulq_rot270_f,
>   vcmlaq_f, vcmlaq_rot90_f, vcmlaq_rot180_f, vcmlaq_rot270_f):
> Removed.
>   (vcaddq_rot90, vcaddq_rot270, vcmulq, vcmulq_rot90,
> vcmulq_rot180,
>   vcmulq_rot270, vcmlaq, vcmlaq_rot90, vcmlaq_rot180,
> vcmlaq_rot270):
>   New.
>   * config/arm/constraints.md (Dz): Include MVE.
>   * config/arm/iterators.md (mve_rotsplit1, mve_rotsplit2): New.
>   (rot): Add UNSPEC_VCMLS, UNSPEC_VCMUL and
> UNSPEC_VCMUL180.
>   (rot_op, rotsplit1, rotsplit2, fcmac1, VCMLA_OP, VCMUL_OP): New.
>   * config/arm/mve.md (VCADDQ_ROT270_S, VCADDQ_ROT90_S,
> VCADDQ_ROT270_U,
>   VCADDQ_ROT90_U, VCADDQ_ROT270_F, VCADDQ_ROT90_F,
> VCMULQ_F,
>   VCMULQ_ROT180_F, VCMULQ_ROT270_F, VCMULQ_ROT90_F,
> VCMLAQ_F,
>   VCMLAQ_ROT180_F, VCMLAQ_ROT90_F, VCMLAQ_ROT270_F,
> VCADDQ_ROT270_S,
>   VCADDQ_ROT270, VCADDQ_ROT90): Removed.
>   (mve_rot, VCMUL): New.
>   (mve_vcaddq_rot270_ mve_vcaddq_rot90_,
>   mve_vcaddq_rot270_f, mve_vcaddq_rot90_f,
> mve_vcmulq_f   mve_vcmulq_rot180_f, mve_vcmulq_rot270_f,
>   mve_vcmulq_rot90_f, mve_vcmlaq_f,
> mve_vcmlaq_rot180_f,
>   mve_vcmlaq_rot270_f, mve_vcmlaq_rot90_f):
> Removed.
>   (mve_vcmlaq, mve_vcmulq,
>   mve_vcaddq, cadd3,
> mve_vcaddq):
>   New.
>   (cmul3): Exclude MVE types.
>   * config/arm/unspecs.md (UNSPEC_VCMUL90, UNSPEC_VCMUL270):
> New.
>   * config/arm/vec-common.md (cadd3,
> cmul3,
>   arm_vcmla, cml4): New.
>   * config/arm/unspecs.md (UNSPEC_VCMUL, UNSPEC_VCMUL180,
> UNSPEC_VCMLS,
>   UNSPEC_VCMLS180): New.
> 
> --

Re: [PATCH] libstdc++: Add C++ runtime support for new 128-bit long double format

2020-12-10 Thread Jonathan Wakely via Gcc-patches


On 10/12/20 11:14 -0600, Peter Bergner via Libstdc++ wrote:

On 12/10/20 10:14 AM, Jonathan Wakely wrote:

On 04/12/20 00:35 +, Jonathan Wakely wrote:

On 03/12/20 20:07 -0300, Tulio Magno Quites Machado Filho via Libstdc++ wrote:

I suggest to replace frexpf128 with __frexpieee128.

The former is available on a glibc that support _Float128 (since glibc 2.26).
The later is available on a glibc that supports binary128 long double (since
glibc 2.32)


Hmm, yes, you pointed me to __frexpieee128 a few months ago, but for
some reason I either didn't switch to using it, or lost a patch when
squashing and rebasing branches. Hopefully I just forgot to change it,
but I'll double check to make sure I haven't left any work on an old
branch. Thanks for suggesting it (again!)


As expected, it still works with a check for __frexpieee128 instead.

So are you happy for me to push this to master with that change?

(It won't be until Tuesday now, as I have some time off).


FYI, Tulio is on vacation through December 22nd and I'm not sure how
closely he is watching his email, if at all.  Given Tulio mentioned
the change in the first place, I think he's ok with the change.
He can beat me later if I'm wrong! :-)


The question was more about pushing the entire patch for ieee128
support, which isn't on master yet.

Do we want this done now for gcc 11? It adds a ton of new symbols to
libstdc++.so which would be a pain to remove again if we decide it's
not ready.

Currently the new features will be enabled by default if the support
is detected in the compiler and glibc. Do we want to gate it behind a
new --enable-something-something option? (I'm assuming not).

Re: [PATCH 1/8 v4] Dead-field warning in structs at LTO-time

2020-12-10 Thread David Malcolm via Gcc-patches

On Fri, 2020-12-04 at 10:58 +0100, Erick Ochoa wrote:
> + // Anonymous fields? (Which the record can be!).
> +   warning (OPT_Wdfa, "RECORD_TYPE %qE has dead field %qE in LTO.\n",
> +   record, field);

Others have pointed out that -Wdfa isn't a good name for the warning, I
like their suggestions.

A few other nitpicks on this:

- "RECORD_TYPE" is an implementation detail of GCC.  Diagnostics should
be worded in terms of the user’s source code, and the source language,
rather than GCC’s own implementation details.

- "dead field" feels like jargon to me.

How about:
  field 'foo' in 'struct bar' is never used [-Wunused-field]
or somesuch?

- The "in LTO" in the message seems like a redundant implementation
detail to me.

- "warning" will implicitly use the global "input_location" as the
location of the diagnostic.  Better would be to use "warning_at" with
the location of the field's declaration.  I think you can get this via
DECL_SOURCE_LOCATION () on the FIELD_DECL.


See also:
  https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html


Hope this is constructive
Dave

Re: [PATCH] c++: Diagnose unstable satisfaction results

2020-12-10 Thread David Malcolm via Gcc-patches

On Thu, 2020-12-10 at 11:21 -0500, Patrick Palka via Gcc-patches wrote:
> + if (entry->diagnose_instability)
> +   {
> + error_at (EXPR_LOCATION (ATOMIC_CONSTR_EXPR (entry->atom)),
> +   "satisfaction value of atomic constraint %qE changed "
> +   "from %qE to %qE", entry->atom, entry->result, 
> result);
> + inform (entry->location,
> + "satisfaction value first evaluated to %qE from here",
> + entry->result);
> +   }

I only glanced at the rest of the patch, but a minor nit I spotted
here: please add an
auto_diagnostic_group d;
to this block to associate the "note" with the "error".


Dave

Re: [PATCH 1/8 v4] Dead-field warning in structs at LTO-time

2020-12-10 Thread Erick Ochoa





On 10/12/2020 18:39, David Malcolm wrote:

On Fri, 2020-12-04 at 10:58 +0100, Erick Ochoa wrote:

+ // Anonymous fields? (Which the record can be!).
+   warning (OPT_Wdfa, "RECORD_TYPE %qE has dead field %qE in LTO.\n",
+   record, field);


Others have pointed out that -Wdfa isn't a good name for the warning, I
like their suggestions.

A few other nitpicks on this:

- "RECORD_TYPE" is an implementation detail of GCC.  Diagnostics should
be worded in terms of the user’s source code, and the source language,
rather than GCC’s own implementation details.

- "dead field" feels like jargon to me.

How about:
   field 'foo' in 'struct bar' is never used [-Wunused-field]
or somesuch?

- The "in LTO" in the message seems like a redundant implementation
detail to me.

- "warning" will implicitly use the global "input_location" as the
location of the diagnostic.  Better would be to use "warning_at" with
the location of the field's declaration.  I think you can get this via
DECL_SOURCE_LOCATION () on the FIELD_DECL.


See also:
   https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html



Thank you everyone for your input. I will change this in the next 
version of the patchset. I am already fixing some errors I found during 
the weekend.


I originally avoided the printing the field name because I believe that 
some fields may be anonymous and I was unsure on how to print them.


I also avoided to use warning_at because originally I didn't know where 
the location for the warning would be appropriate. (In the declaration 
of the structure? What if there are writes to the field but never reads? 
Currently these are marked as dead as well.)


"In LTO" is added in the warning because at the source level, if one 
were to remove the field from the declaration and compile, there might 
be compile time errors because dead code elimination has not yet been 
run and there might be references to these fields. (They're only truly 
dead after dead code elimination.)


At the moment, my next step will be to review the guidelines for 
diagnostics and think about how to improve the current diagnostic. If 
you have more suggestions, please do let me know!


-Erick




Hope this is constructive
Dave

Re: RFC: ARM MVE and Neon auto-vectorization

2020-12-10 Thread Richard Sandiford via Gcc-patches

Christophe Lyon  writes:
> On Wed, 9 Dec 2020 at 17:47, Richard Sandiford
>  wrote:
>>
>> Christophe Lyon via Gcc-patches  writes:
>> > Hi,
>> >
>> > I've been working for a while on enabling auto-vectorization for ARM
>> > MVE, and I find it a bit awkward to keep things common with Neon as
>> > much as possible.
>> >
>> > I've just sent a few patches for logical operators
>> > (vand/vorr/veor/vbic), and I have a few more WIP patches where I
>> > struggle to avoid duplication.
>> >
>> > For example, vneg is supported in different modes by MVE and Neon:
>> > * Neon: VDQ and VH iterators: V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF
>> > V2SF V4SF V2DI  and V8HF V4HF
>> > * MVE: MVE_2 and MVE_0 iterators: V16QI V8HI V4SI and V8HF V4SF
>>
>> My hope behind the ARM_HAVE__ macros was that the common
>> (optab) define_expand could use those, with the most permissive iterator
>> necessary.  We could stick on a "&& !TARGET_IWMMXT" for things that
>> aren't implemented for iwMMXt.
>>
>> The above combination seems like a natural fit for unmodified
>> VDQ with ARM_HAVE__ARITH.  This would be similar to the
>> existing add3 pattern.
>>
>
> OK, so it looks like I should revert/fix my just-committed vand patch,
> and restore the unconditional definition of VDQ and use
> ARM_HAVE__ARITH for the expander?

That's one for the maintainers, the above is just my opinion.

I don't think a revert is necessary though.  It looks like it
would be a small delta on top of the committed version.

>> > My 'vand' patch changes the definition of VDQ so that the relevant
>> > modes are enabled only when !TARGET_HAVE_MVE (V8QI, ...), and this
>> > helps writing a simpler expander.
>> >
>> > However, vneg is used by vshr (right-shifts by register are
>> > implemented as left-shift by negation of that register), so the
>> > expander uses something like:
>> >
>> >   emit_insn (gen_neg2 (neg, operands[2]));
>> >   if (TARGET_NEON)
>> >   emit_insn (gen_ashl3_signed (operands[0], operands[1], neg));
>> >   else
>> >   emit_insn (gen_mve_vshlq_s (operands[0], operands[1], neg));
>> >
>> > which does not work if the iterator has conditional members: the
>> > 'else' part is still generated for  unsupported by MVE.
>>
>> FWIW, I agree with Andre that it would be good to remove unnecessary
>> NEON/MVE differences like this.
>>
> OK thanks for the feedback, I'll update my other patches along these
> lines. Too bad this will delay auto-vectorization improvement more
> than I hoped :-(

Just to be sure, after seeing:

;; We use the same code as in neon.md (TODO: avoid this duplication).

in the vand patch:

I didn't mean above that we should consolidate MVE and NEON define_insns
that happen to be the same.  I agree that should be at most a TODO (like
the one above).

It was more that it would be good to avoid having:

  if (TARGET_NEON)
emit_insn (gen_neon_thing (…));
  else
emit_insn (gen_mve_thing (…));

in cases where neon_thing and mve_thing have the same pattern.
Instead we can just have a single define_expand that generates
the common pattern for both architectures (if we don't already).
The common expand would work in a similar way to named optab patterns,
except that it's internal to the arm port.

FWIW, one hacky way of honouring the MVE intrinsic naming scheme
while using different names for the underlying .md patterns would
be to have:

  const insn_code CODE_FOR_mve_ = CODE_FOR_;

in arm-builtins.c, perhaps wrapped in macros to autogenerate the
mode differences.  Not elegant, for sure, but maybe it would be
useful for some things.

Thanks,
Richard

c++: Name lookup for modules

2020-12-10 Thread Nathan Sidwell


This augments the name lookup with knowledge about the BINDING_VECTOR.
That holds per-module namespace bindings, and we need to collect the
bindings in visible imports when we do lookup.  We also need to do
some checking when we're pushing a new decl to check we're not
overriding an existing visible binding in some way.

To deal with the Global Module and Module Partitions, we reserve 1 or
2 slots inthe BINDING_VECTOR to record those entities that may
legitimately appear in more than one module.

As mentioned before, the BINDING_VECTOR is created lazily, when
imported bindings appear.  The current TUs decls then appear on slot
zero.

gcc/cp/
* cp-tree.h (visible_instantiation_path): Renamed.
* module.cc (get_originating_module_decl, lazy_load_binding)
(lazy_load_members, visible_instantiation_path): Stubs.
* name-lookup.c (STAT_TYPE_VISIBLE_P, STAT_VISIBLE): New.
(search_imported_binding_slot, init_global_partition)
(get_fixed_binding_slot): New.
(name_lookup::process_module_binding): New.
(name_lookup::search_namespace_only): Search BINDING_VECTOR.
(name_lookup::adl_namespace_fns): Likewise.
(name_lookip::search_adl): Search visible instantiation path.
(maybe_lazily_declare): Maybe lazy load members.
(implicitly_exporT_namespace): New.
(maybe_record_mergeable_decl): New.
(check_module_override): New.
(do_pushdecl): Deal with BINDING_VECTOR, check override.
(add_mergeable_namespace_entity): New.
(get_namespace_binding): Deal with BINDING_VECTOR.
(do_namespace_alias): Call set_originating_module.
(lookup_elaborated_type_1): Deal with BINDING_VECTOR.
(do_pushtag): Call set_originating_module.
(reuse_namespace): New.
(make_namespace_finish): Add FROM_IMPORT parm.
(push_namespace): Deal with BINDING_VECTOR & namespace reuse.
(maybe_save_operator_binding): Save when module CMI in play.
* name-lookup.h (add_mergeable_namespace_entity): Declare.

pushing to trunk

--
Nathan Sidwell
diff --git i/gcc/cp/cp-tree.h w/gcc/cp/cp-tree.h
index 6270fadfe2b..5304f6b86a2 100644
--- i/gcc/cp/cp-tree.h
+++ w/gcc/cp/cp-tree.h
@@ -6993,7 +6993,7 @@ extern void maybe_check_all_macros (cpp_reader *);
 extern void finish_module_processing (cpp_reader *);
 extern char const *module_name (unsigned, bool header_ok);
 extern bitmap get_import_bitmap ();
-extern bitmap module_visible_instantiation_path (bitmap *);
+extern bitmap visible_instantiation_path (bitmap *);
 extern void module_begin_main_file (cpp_reader *, line_maps *,
 const line_map_ordinary *);
 extern void module_preprocess_options (cpp_reader *);
diff --git i/gcc/cp/module.cc w/gcc/cp/module.cc
index dc4fa41bbc4..c98df14c45e 100644
--- i/gcc/cp/module.cc
+++ w/gcc/cp/module.cc
@@ -135,11 +135,33 @@ set_defining_module (tree)
 {
 }
 
+tree
+get_originating_module_decl (tree decl)
+{
+  return decl;
+}
+
 void
 set_originating_module (tree, bool)
 {
 }
 
+void
+lazy_load_binding (unsigned, tree, tree, binding_slot *)
+{
+}
+
+void
+lazy_load_members (tree)
+{
+}
+
+bitmap
+visible_instantiation_path (bitmap *)
+{
+  return nullptr;
+}
+
 void
 import_module (module_state *, location_t, bool, tree, cpp_reader *)
 {
diff --git i/gcc/cp/name-lookup.c w/gcc/cp/name-lookup.c
index c921baa6301..9c945842fa1 100644
--- i/gcc/cp/name-lookup.c
+++ w/gcc/cp/name-lookup.c
@@ -61,14 +61,19 @@ enum binding_slots
 
 /* Create an overload suitable for recording an artificial TYPE_DECL
and another decl.  We use this machanism to implement the struct
-   stat hack within a namespace.  It'd be nice to use it everywhere.  */
+   stat hack.  */
 
 #define STAT_HACK_P(N) ((N) && TREE_CODE (N) == OVERLOAD && OVL_LOOKUP_P (N))
+#define STAT_TYPE_VISIBLE_P(N) TREE_USED (OVERLOAD_CHECK (N))
 #define STAT_TYPE(N) TREE_TYPE (N)
 #define STAT_DECL(N) OVL_FUNCTION (N)
+#define STAT_VISIBLE(N) OVL_CHAIN (N)
 #define MAYBE_STAT_DECL(N) (STAT_HACK_P (N) ? STAT_DECL (N) : N)
 #define MAYBE_STAT_TYPE(N) (STAT_HACK_P (N) ? STAT_TYPE (N) : NULL_TREE)
 
+/* When a STAT_HACK_P is true, OVL_USING_P and OVL_EXPORT_P are valid
+   and apply to the hacked type.  */
+
 /* For regular (maybe) overloaded functions, we have OVL_HIDDEN_P.
But we also need to indicate hiddenness on implicit type decls
(injected friend classes), and (coming soon) decls injected from
@@ -129,7 +134,197 @@ find_namespace_value (tree ns, tree name)
   return b ? MAYBE_STAT_DECL (*b) : NULL_TREE;
 }
 
-/* Add DECL to the list of things declared in B.  */
+/* Look in *SLOT for a the binding of NAME in imported module IX.
+   Returns pointer to binding's slot, or NULL if not found.  Does a
+   binary search, as this is mainly used for random access during
+   importing.  Do not use for the fixed slots.  */
+
+static binding_slot *
+search_imported_binding_slot (tree *slot, unsigned ix)
+{
+  gcc_assert (ix);
+
+  if (!

Re: [AArch64] Add --with-tune configure flag

2020-12-10 Thread Wilco Dijkstra via Gcc-patches

Hi Richard,

> I specifically want to test generic SVE rather than SVE tuned for a
> specific core, so --with-arch=armv8.2-a+sve is the thing I want to test.

Btw that's not actually what you get if you use cc1 - you always get armv8.0,
so --with-arch doesn't work at all. The only case that appears to work in cc1
is --with-cpu as long as it is not --with-cpu=native.

Cheers,
Wilco

Re: [PATCH] Correct -fdump-go-spec's handling of incomplete types

2020-12-10 Thread Rainer Orth

Hi Ian,

> On Tue, Dec 8, 2020 at 2:57 PM Nikhil Benesch  
> wrote:
>>
>> This patch corrects -fdump-go-spec's handling of incomplete types.
>> To my knowledge the issue fixed here has not been previously
>> reported. It was exposed by an in-progress port of gccgo to FreeBSD.
>>
>> Given the following C code
>>
>> struct s_fwd v_fwd;
>> struct s_fwd { };
>>
>> -fdump-go-spec currently produces the following Go code
>>
>> var v_fwd struct {};
>> type s_fwd s_fwd;
>>
>> whereas the correct Go code is:
>>
>> var v_fwd s_fwd;
>> type s_fwd struct {};
>>
>> (Go is considerably more permissive than C with out-of-order
>> declarations, so anywhere an out-of-order declaration is valid in
>> C it is valid in Go.)
>>
>> gcc/:
>> * godump.c (go_format_type): Don't consider whether a type has
>> been seen when determining whether to output a type by name.
>> Consider only the use_type_name parameter.
>> (go_output_typedef): When outputting a typedef, format the
>> declaration's original type, which contains the name of the
>> underlying type rather than the name of the typedef.
>> gcc/testsuite:
>> * gcc.misc-tests/godump-1.c: Add test case.
>
> Thanks.  I changed function types to use type names, and committed like so.

This patch badly broke Solaris bootstrap:

runtime_sysinfo.go:623:6: error: invalid recursive type
  623 | type ___FILE ___FILE
  |  ^
runtime_sysinfo.go:7045:6: error: redefinition of ‘_mld_hdr_t’
 7045 | type _mld_hdr_t _mld_hdr
  |  ^
runtime_sysinfo.go:1510:6: note: previous definition of ‘_mld_hdr_t’ was here
 1510 | type _mld_hdr_t _mld_hdr
  |  ^
runtime_sysinfo.go:7070:6: error: redefinition of ‘_upad128_t’
 7070 | type _upad128_t struct { _l [4]uint32; }
  |  ^
runtime_sysinfo.go:7029:6: note: previous definition of ‘_upad128_t’ was here
 7029 | type _upad128_t struct {}
  |  ^
runtime_sysinfo.go:7071:6: error: redefinition of ‘_zone_net_addr_t’
 7071 | type _zone_net_addr_t _zone_net_addr
  |  ^
runtime_sysinfo.go:1079:6: note: previous definition of ‘_zone_net_addr_t’ was 
here
 1079 | type _zone_net_addr_t _zone_net_addr
  |  ^
runtime_sysinfo.go:7072:6: error: redefinition of ‘_flow_arp_desc_t’
 7072 | type _flow_arp_desc_t _flow_arp_desc_s
  |  ^
runtime_sysinfo.go:1127:6: note: previous definition of ‘_flow_arp_desc_t’ was 
here
 1127 | type _flow_arp_desc_t _flow_arp_desc_s
  |  ^
runtime_sysinfo.go:7073:6: error: redefinition of ‘_flow_l3_desc_t’
 7073 | type _flow_l3_desc_t _flow_l3_desc_s
  |  ^
runtime_sysinfo.go:1130:6: note: previous definition of ‘_flow_l3_desc_t’ was 
here
 1130 | type _flow_l3_desc_t _flow_l3_desc_s
  |  ^
runtime_sysinfo.go:7074:6: error: redefinition of ‘_mac_ipaddr_t’
 7074 | type _mac_ipaddr_t _mac_ipaddr_s
  |  ^
runtime_sysinfo.go:1150:6: note: previous definition of ‘_mac_ipaddr_t’ was here
 1150 | type _mac_ipaddr_t _mac_ipaddr_s
  |  ^
runtime_sysinfo.go:7075:6: error: redefinition of ‘_mactun_info_t’
 7075 | type _mactun_info_t _mactun_info_s
  |  ^
runtime_sysinfo.go:1213:6: note: previous definition of ‘_mactun_info_t’ was 
here
 1213 | type _mactun_info_t _mactun_info_s
  |  ^
runtime_sysinfo.go:187:19: error: use of undefined type ‘_timespec’
  187 | type _timestruc_t _timespec
  |   ^
runtime_sysinfo.go:1213:21: error: use of undefined type ‘_mactun_info_s’
 1213 | type _mactun_info_t _mactun_info_s
  | ^
runtime_sysinfo.go:741:13: error: use of undefined type ‘_ip6_hdr’
  741 | type _ip6_t _ip6_hdr
  | ^
runtime_sysinfo.go:1079:23: error: use of undefined type ‘_zone_net_addr’
 1079 | type _zone_net_addr_t _zone_net_addr
  |   ^
runtime_sysinfo.go:1127:23: error: use of undefined type ‘_flow_arp_desc_s’
 1127 | type _flow_arp_desc_t _flow_arp_desc_s
  |   ^
runtime_sysinfo.go:1130:22: error: use of undefined type ‘_flow_l3_desc_s’
 1130 | type _flow_l3_desc_t _flow_l3_desc_s
  |  ^
runtime_sysinfo.go:1150:20: error: use of undefined type ‘_mac_ipaddr_s’
 1150 | type _mac_ipaddr_t _mac_ipaddr_s
  |^
runtime_sysinfo.go:1210:28: error: use of undefined type ‘_mac_resource_props_s’
 1210 | type _mac_resource_props_t _mac_resource_props_s
  |^
runtime_sysinfo.go:1510:17: error: use of undefined type ‘_mld_hdr’
 1510 | type _mld_hdr_t _mld_hdr
  | ^
runtime_sysinfo.go:1519:17: error: use of undefined type ‘_mld2mar’
 1519 | type _mld2mar_t _mld2mar
  | ^
runtime_sysinfo.go:1535:29: error: use of undefined type ‘_nd_neighbor_solicit’
 1535 | type _nd_neighbor_solicit_t _nd_neighbor_solicit
  | ^
runtime_sysinfo.go:1538:28: error: use of undefined type ‘_nd_neighbor_advert’
 15

Re: [PATCH] Correct -fdump-go-spec's handling of incomplete types

2020-12-10 Thread Nikhil Benesch via Gcc-patches

Sorry about this, Rainer. I think I see the issue, though it's hard to
be certain without access to a Solaris machine. Assuming the icmp6.h
header hasn't changed since the last time Solaris code was open source
[0], I think the issue is likely to be typedefs that define a named
struct and an alias for that struct in one shot. I'll start pulling on
that thread.

[0]: 
https://github.com/kofemann/opensolaris/blob/80192cd83/usr/src/uts/common/netinet/icmp6.h#L71-L74

On Thu, Dec 10, 2020 at 2:18 PM Rainer Orth  
wrote:
>
> Hi Ian,
>
> > On Tue, Dec 8, 2020 at 2:57 PM Nikhil Benesch  
> > wrote:
> >>
> >> This patch corrects -fdump-go-spec's handling of incomplete types.
> >> To my knowledge the issue fixed here has not been previously
> >> reported. It was exposed by an in-progress port of gccgo to FreeBSD.
> >>
> >> Given the following C code
> >>
> >> struct s_fwd v_fwd;
> >> struct s_fwd { };
> >>
> >> -fdump-go-spec currently produces the following Go code
> >>
> >> var v_fwd struct {};
> >> type s_fwd s_fwd;
> >>
> >> whereas the correct Go code is:
> >>
> >> var v_fwd s_fwd;
> >> type s_fwd struct {};
> >>
> >> (Go is considerably more permissive than C with out-of-order
> >> declarations, so anywhere an out-of-order declaration is valid in
> >> C it is valid in Go.)
> >>
> >> gcc/:
> >> * godump.c (go_format_type): Don't consider whether a type has
> >> been seen when determining whether to output a type by name.
> >> Consider only the use_type_name parameter.
> >> (go_output_typedef): When outputting a typedef, format the
> >> declaration's original type, which contains the name of the
> >> underlying type rather than the name of the typedef.
> >> gcc/testsuite:
> >> * gcc.misc-tests/godump-1.c: Add test case.
> >
> > Thanks.  I changed function types to use type names, and committed like so.
>
> This patch badly broke Solaris bootstrap:
>
> runtime_sysinfo.go:623:6: error: invalid recursive type
>   623 | type ___FILE ___FILE
>   |  ^
> runtime_sysinfo.go:7045:6: error: redefinition of ‘_mld_hdr_t’
>  7045 | type _mld_hdr_t _mld_hdr
>   |  ^
> runtime_sysinfo.go:1510:6: note: previous definition of ‘_mld_hdr_t’ was here
>  1510 | type _mld_hdr_t _mld_hdr
>   |  ^
> runtime_sysinfo.go:7070:6: error: redefinition of ‘_upad128_t’
>  7070 | type _upad128_t struct { _l [4]uint32; }
>   |  ^
> runtime_sysinfo.go:7029:6: note: previous definition of ‘_upad128_t’ was here
>  7029 | type _upad128_t struct {}
>   |  ^
> runtime_sysinfo.go:7071:6: error: redefinition of ‘_zone_net_addr_t’
>  7071 | type _zone_net_addr_t _zone_net_addr
>   |  ^
> runtime_sysinfo.go:1079:6: note: previous definition of ‘_zone_net_addr_t’ 
> was here
>  1079 | type _zone_net_addr_t _zone_net_addr
>   |  ^
> runtime_sysinfo.go:7072:6: error: redefinition of ‘_flow_arp_desc_t’
>  7072 | type _flow_arp_desc_t _flow_arp_desc_s
>   |  ^
> runtime_sysinfo.go:1127:6: note: previous definition of ‘_flow_arp_desc_t’ 
> was here
>  1127 | type _flow_arp_desc_t _flow_arp_desc_s
>   |  ^
> runtime_sysinfo.go:7073:6: error: redefinition of ‘_flow_l3_desc_t’
>  7073 | type _flow_l3_desc_t _flow_l3_desc_s
>   |  ^
> runtime_sysinfo.go:1130:6: note: previous definition of ‘_flow_l3_desc_t’ was 
> here
>  1130 | type _flow_l3_desc_t _flow_l3_desc_s
>   |  ^
> runtime_sysinfo.go:7074:6: error: redefinition of ‘_mac_ipaddr_t’
>  7074 | type _mac_ipaddr_t _mac_ipaddr_s
>   |  ^
> runtime_sysinfo.go:1150:6: note: previous definition of ‘_mac_ipaddr_t’ was 
> here
>  1150 | type _mac_ipaddr_t _mac_ipaddr_s
>   |  ^
> runtime_sysinfo.go:7075:6: error: redefinition of ‘_mactun_info_t’
>  7075 | type _mactun_info_t _mactun_info_s
>   |  ^
> runtime_sysinfo.go:1213:6: note: previous definition of ‘_mactun_info_t’ was 
> here
>  1213 | type _mactun_info_t _mactun_info_s
>   |  ^
> runtime_sysinfo.go:187:19: error: use of undefined type ‘_timespec’
>   187 | type _timestruc_t _timespec
>   |   ^
> runtime_sysinfo.go:1213:21: error: use of undefined type ‘_mactun_info_s’
>  1213 | type _mactun_info_t _mactun_info_s
>   | ^
> runtime_sysinfo.go:741:13: error: use of undefined type ‘_ip6_hdr’
>   741 | type _ip6_t _ip6_hdr
>   | ^
> runtime_sysinfo.go:1079:23: error: use of undefined type ‘_zone_net_addr’
>  1079 | type _zone_net_addr_t _zone_net_addr
>   |   ^
> runtime_sysinfo.go:1127:23: error: use of undefined type ‘_flow_arp_desc_s’
>  1127 | type _flow_arp_desc_t _flow_arp_desc_s
>   |   ^
> runtime_sysinfo.go:1130:22: error: use of undefined type ‘_flow_l3_desc_s’
>  1130 | type _flow_l3_desc_t _flow_l3_desc_s
>   |  ^
> runtime_sysinfo.go:1150:20: error: use of undefined type ‘_mac_ipaddr_s’
>  1150 | ty

Re: [PATCH] Correct -fdump-go-spec's handling of incomplete types

2020-12-10 Thread Rainer Orth

Hi Nikhil,

> Sorry about this, Rainer. I think I see the issue, though it's hard to
> be certain without access to a Solaris machine. Assuming the icmp6.h
> header hasn't changed since the last time Solaris code was open source
> [0], I think the issue is likely to be typedefs that define a named
> struct and an alias for that struct in one shot. I'll start pulling on
> that thread.
>
> [0]:
> https://github.com/kofemann/opensolaris/blob/80192cd83/usr/src/uts/common/netinet/icmp6.h#L71-L74

I've just checked:  is effectively unchanged since
Solaris 10.

Besides, there's gcc211 in the GCC compile farm, running Solaris 11.3/SPARC.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University

c++: modules & using-decls

2020-12-10 Thread Nathan Sidwell



This extends using-decls to modules.  In modules you can export a
using decl, but the exported decl must have external linkage already.
One thing you can do is export something from the GMF.  

The novel thing is that now 'export using foo::bar;' *in namespace
bar* can mean something significant (rather than be an obscure nop).

gcc/cp/
* name-lookup.c (do_nonmember_using_decl): Add INSERT_P parm.
Deal with exporting using decls.
(finish_nonmember_using_decl): Examine BINDING_VECTOR.

pushing to trunk
--
Nathan Sidwell
diff --git i/gcc/cp/name-lookup.c w/gcc/cp/name-lookup.c
index 9c945842fa1..7dd4efa0a85 100644
--- i/gcc/cp/name-lookup.c
+++ w/gcc/cp/name-lookup.c
@@ -4567,7 +4567,7 @@ pushdecl_outermost_localscope (tree x)
 
 static bool
 do_nonmember_using_decl (name_lookup &lookup, bool fn_scope_p,
-			 tree *value_p, tree *type_p)
+			 bool insert_p, tree *value_p, tree *type_p)
 {
   tree value = *value_p;
   tree type = *type_p;
@@ -4587,13 +4587,33 @@ do_nonmember_using_decl (name_lookup &lookup, bool fn_scope_p,
   lookup.value = NULL_TREE;
 }
 
+  /* Only process exporting if we're going to be inserting.  */
+  bool revealing_p = insert_p && !fn_scope_p && module_has_cmi_p ();
+
+  /* First do the value binding.  */
   if (!lookup.value)
-/* Nothing.  */;
+/* Nothing (only implicit typedef found).  */
+gcc_checking_assert (lookup.type);
   else if (OVL_P (lookup.value) && (!value || OVL_P (value)))
 {
   for (lkp_iterator usings (lookup.value); usings; ++usings)
 	{
 	  tree new_fn = *usings;
+	  bool exporting = revealing_p && module_exporting_p ();
+	  if (exporting)
+	{
+	  /* If the using decl is exported, the things it refers
+		 to must also be exported (or not in module purview).  */
+	  if (!DECL_MODULE_EXPORT_P (new_fn)
+		  && (DECL_LANG_SPECIFIC (new_fn)
+		  && DECL_MODULE_PURVIEW_P (new_fn)))
+		{
+		  error ("%q#D does not have external linkage", new_fn);
+		  inform (DECL_SOURCE_LOCATION (new_fn),
+			  "%q#D declared here", new_fn);
+		  exporting = false;
+		}
+	}
 
 	  /* [namespace.udecl]
 
@@ -4601,6 +4621,10 @@ do_nonmember_using_decl (name_lookup &lookup, bool fn_scope_p,
 	 scope has the same name and the same parameter types as a
 	 function introduced by a using declaration the program is
 	 ill-formed.  */
+	  /* This seems overreaching, asking core -- why do we care
+	 about decls in the namespace that we cannot name (because
+	 they are not transitively imported.  We just check the
+	 decls that are in this TU.  */
 	  bool found = false;
 	  for (ovl_iterator old (value); !found && old; ++old)
 	{
@@ -4609,8 +4633,25 @@ do_nonmember_using_decl (name_lookup &lookup, bool fn_scope_p,
 	  if (new_fn == old_fn)
 		{
 		  /* The function already exists in the current
-		 namespace.  */
+		 namespace.  We will still want to insert it if
+		 it is revealing a not-revealed thing.  */
 		  found = true;
+		  if (!revealing_p)
+		;
+		  else if (old.using_p ())
+		{
+		  if (exporting)
+			/* Update in place.  'tis ok.  */
+			OVL_EXPORT_P (old.get_using ()) = true;
+		  ;
+		}
+		  else if (DECL_MODULE_EXPORT_P (new_fn))
+		;
+		  else
+		{
+		  value = old.remove_node (value);
+		  found = false;
+		}
 		  break;
 		}
 	  else if (old.using_p ())
@@ -4634,11 +4675,11 @@ do_nonmember_using_decl (name_lookup &lookup, bool fn_scope_p,
 		}
 	}
 
-	  if (!found)
+	  if (!found && insert_p)
 	/* Unlike the decl-pushing case we don't drop anticipated
 	   builtins here.  They don't cause a problem, and we'd
 	   like to match them with a future declaration.  */
-	value = ovl_insert (new_fn, value, true);
+	value = ovl_insert (new_fn, value, 1 + exporting);
 	}
 }
   else if (value
@@ -4649,28 +4690,34 @@ do_nonmember_using_decl (name_lookup &lookup, bool fn_scope_p,
   diagnose_name_conflict (lookup.value, value);
   failed = true;
 }
-  else
+  else if (insert_p)
+// FIXME:what if we're newly exporting lookup.value
 value = lookup.value;
-
+  
+  /* Now the type binding.  */
   if (lookup.type && lookup.type != type)
 {
+  // FIXME: What if we're exporting lookup.type?
   if (type && !decls_match (lookup.type, type))
 	{
 	  diagnose_name_conflict (lookup.type, type);
 	  failed = true;
 	}
-  else
+  else if (insert_p)
 	type = lookup.type;
 }
 
-  /* If value is empty, shift any class or enumeration name back.  */
-  if (!value)
+  if (insert_p)
 {
-  value = type;
-  type = NULL_TREE;
+  /* If value is empty, shift any class or enumeration name back.  */
+  if (!value)
+	{
+	  value = type;
+	  type = NULL_TREE;
+	}
+  *value_p = value;
+  *type_p = type;
 }
-  *value_p = value;
-  *type_p = type;
 
   return failed;
 }
@@ -5506,8 +5553,10 @@ do_class_using_decl (tree scope, tree name)
 }
 
 
-/* Return the

Re: [PATCH] Correct -fdump-go-spec's handling of incomplete types

2020-12-10 Thread Nikhil Benesch via Gcc-patches


On 12/10/20 2:34 PM, Rainer Orth wrote:

I've just checked:  is effectively unchanged since
Solaris 10.

Besides, there's gcc211 in the GCC compile farm, running Solaris 11.3/SPARC.


Ah, thanks, I wasn't aware there was a compile farm available to GCC
developers. I've applied for an account, but it sounds like it may take
a while to get approved.

My theory was wrong, by the way. This C snippet, representative of the
Solaris headers, expands just fine:

struct in6_addr {
union {
uint8_t __u6_addr8[16];
uint16_t__u6_addr16[8];
uint32_t__u6_addr32[4];
} __u6_addr;/* 128-bit IP6 address */
};

typedef struct icmp6_hdr {
uint8_t  icmp6_type;/* type field */
uint8_t  icmp6_code;/* code field */
uint16_t icmp6_cksum;   /* checksum field */
union {
uint32_t icmp6_un_data32[1];/* type-specific field */
uint16_t icmp6_un_data16[2];/* type-specific field */
uint8_t  icmp6_un_data8[4]; /* type-specific field */
} icmp6_dataun;
} icmp6_t;

typedef struct mld_hdr {
struct icmp6_hdrmld_icmp6_hdr;
struct in6_addr mld_addr; /* multicast address */
} mld_hdr_t;

Something else is afoot here, but I'm not sure what.

Nikhil

Re: [PATCH 1/2] libstdc++: Add --enable-stdio=stdio_pure option [v2]

2020-12-10 Thread Jonathan Wakely via Gcc-patches


On 09/12/20 18:46 -0800, Keith Packard wrote:

This option directs the library to only use simple stdio APIs instead
of using fileno to get the file descriptor for use with POSIX APIs.


This looks fine to me, even at this stage of GCC 11 (it doesn't affect
the default configurations, just adds a new one that nobody is going
to use unless they ask for it explicitly).

I'll do a bit more testing and push it next week.

Thanks for the patch!

Re: [PATCH] c++: Diagnose unstable satisfaction results

2020-12-10 Thread Jason Merrill via Gcc-patches


On 12/10/20 11:21 AM, Patrick Palka wrote:

This implements lightweight heuristical detection and diagnosing of
satisfaction results that change at different points in the program,
which renders the program as ill-formed NDR as of P2014.  We've recently
started to more aggressively cache satisfaction results, and so the goal
here is to make this caching behavior more transparent to users.

A satisfaction result is flagged as "potentially unstable" (at the atom
granularity) if during its computation, some type completion failure
occurs.  This is detected by making complete_type_or_maybe_complain
increment a counter upon failure and comparing the value of the counter
before and after satisfaction.  (We don't instrument complete_type
directly because it's used "opportunistically" in many spots where type
completion failure doesn't necessary lead to substitution failure.)

Flagged satisfaction results are always recomputed from scratch, even
when performing satisfaction quietly.  We then compare the recomputed
result with the cached result, and if they differ, proceed with
diagnosing the instability.  (We may also unflag a result if it turned
out to be independent of the previously detected type completion
failure.)  When performing satisfaction noisily, we always check
instability.

Most of the implementation is confined to the satisfaction_cache class,
which has been completely rewritten.

Bootstrapped and regtested on x86_64-pc-linux-gnu, and also tested on
cmcstl2 and range-v3.  The static_assert failures in the view.join test
from cmcstl2 are now elaborated on after this patch, and additionally
the alg.equal_range test now fails for the same reason as the view.join
test.

gcc/cp/ChangeLog:

* constraint.cc (failed_type_completion_count): New.
(note_failed_type_completion_for_satisfaction): New.
(sat_entry::constr): Rename to ...
(sat_entry::atom): ... this.
(sat_entry::location): New member.
(sat_entry::maybe_unstable): New member.
(sat_entry::diagnose_instability): New member.
(struct sat_hasher): Adjust after the above renaming.
(get_satisfaction, save_satisfaction): Remove.
(satisfaction_cache): Rewrite completely.
(satisfy_atom): When instantiation of the parameter mapping
fails, set diagnose_instability.  Propagate location from
inst_cache.entry to cache.entry if the secondary lookup
succeeded.
(satisfy_declaration_constraints): When
failed_type_completion_count differs before and after
satisfaction, then don't cache the satisfaction result.
* cp-tree.h (note_failed_type_completion_for_satisfaction):
Declare.
* pt.c (tsubst) : Use
complete_type_or_maybe_complain instead of open-coding it.
* typeck.c (complete_type_or_maybe_complain): Call
note_failed_type_completion_for_satisfaction when type
completion fails.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-complete1.C: New test.
* g++.dg/cpp2a/concepts-complete2.C: New test.
* g++.dg/cpp2a/concepts-complete3.C: New test.
---
  gcc/cp/constraint.cc  | 283 ++
  gcc/cp/cp-tree.h  |   2 +
  gcc/cp/pt.c   |   9 +-
  gcc/cp/typeck.c   |   1 +
  .../g++.dg/cpp2a/concepts-complete1.C |  18 ++
  .../g++.dg/cpp2a/concepts-complete2.C |  23 ++
  .../g++.dg/cpp2a/concepts-complete3.C |  16 +
  7 files changed, 282 insertions(+), 70 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-complete1.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-complete2.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-complete3.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 73c038e3afe..ee702b34d01 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -2374,35 +2374,82 @@ tsubst_parameter_mapping (tree map, tree args, 
tsubst_flags_t complain, tree in_
  Constraint satisfaction
  ---*/
  
-/* Hash functions for satisfaction entries.  */

+/* A counter incremented by note_failed_type_completion_for_satisfaction().
+   It's used by the satisfaction caches in order to flag "potentially unstable"
+   satisfaction results.  */
+
+static unsigned failed_type_completion_count;
+
+/* Called whenever a type completion failure occurs that definitely affects
+   the semantics of the program, by e.g. inducing substitution failure.  */
+
+void
+note_failed_type_completion_for_satisfaction (tree type)
+{
+  gcc_checking_assert (!COMPLETE_TYPE_P (type));
+  if (CLASS_TYPE_P (type)
+  && CLASSTYPE_TEMPLATE_INSTANTIATION (type))
+/* After instantiation, an incomplete class template specialization
+   will always be incomplete, so we don't increment the counter

[pushed] c++: Add fixed test [PR91506]

2020-12-10 Thread Marek Polacek via Gcc-patches

Pre-r11-557 we issued a bogus

  error: parameter may not have variably modified type 'double [x]'

but now we compile this, as we should.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/testsuite/ChangeLog:

PR c++/91506
* g++.dg/init/array60.C: New test.
---
 gcc/testsuite/g++.dg/init/array60.C | 8 
 1 file changed, 8 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/init/array60.C

diff --git a/gcc/testsuite/g++.dg/init/array60.C 
b/gcc/testsuite/g++.dg/init/array60.C
new file mode 100644
index 000..db5095a9bd5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/array60.C
@@ -0,0 +1,8 @@
+// PR c++/91506
+
+double
+test(int *arr, int x)
+{
+double ret(double(arr[x]) + 1);
+return ret;
+}

base-commit: 4f1d8bd5096cc234313b23f64cdac53a94ff75a2
-- 
2.29.2

[pushed] c++: Update value of __cplusplus for C++20.

2020-12-10 Thread Jason Merrill via Gcc-patches

It's past time to update this macro to the specified value for C++20.

libcpp/ChangeLog:

* init.c (cpp_init_builtins): Update __cplusplus for C++20.
---
 libcpp/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcpp/init.c b/libcpp/init.c
index f77dc26a003..7987798afbe 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -542,7 +542,7 @@ cpp_init_builtins (cpp_reader *pfile, int hosted)
 {
   if (CPP_OPTION (pfile, lang) == CLK_CXX20
  || CPP_OPTION (pfile, lang) == CLK_GNUCXX20)
-   _cpp_define_builtin (pfile, "__cplusplus 201709L");
+   _cpp_define_builtin (pfile, "__cplusplus 202002L");
   else if (CPP_OPTION (pfile, lang) == CLK_CXX17
  || CPP_OPTION (pfile, lang) == CLK_GNUCXX17)
_cpp_define_builtin (pfile, "__cplusplus 201703L");

base-commit: 4f1d8bd5096cc234313b23f64cdac53a94ff75a2
-- 
2.27.0

[pushed] c++: Add make_temp_override generator functions

2020-12-10 Thread Jason Merrill via Gcc-patches

A common pattern before C++17 is the generator function, used to avoid
having to specify the type of a container element by using a function call
to get type deduction; for example, std::make_pair.  C++17 added class type
argument deduction, making generator functions unnecessary for many uses,
but GCC won't be written in C++17 for years yet.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

* cp-tree.h (struct type_identity): New.
(make_temp_override): New.
* decl.c (grokdeclarator): Use it.
* except.c (maybe_noexcept_warning): Use it.
* parser.c (cp_parser_enum_specifier): Use it.
(cp_parser_parameter_declaration_clause): Use it.
(cp_parser_gnu_attributes_opt): Use it.
(cp_parser_std_attribute): Use it.
---
 gcc/cp/cp-tree.h | 32 
 gcc/cp/decl.c|  2 +-
 gcc/cp/except.c  |  2 +-
 gcc/cp/parser.c  |  8 
 4 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 5304f6b86a2..5cd2999ca85 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -1932,6 +1932,38 @@ public:
   ~temp_override() { overridden_variable = saved_value; }
 };
 
+/* Wrapping a template parameter in type_identity_t hides it from template
+   argument deduction.  */
+#if __cpp_lib_type_identity
+using std::type_identity_t;
+#else
+template 
+struct type_identity { typedef T type; };
+template 
+using type_identity_t = typename type_identity::type;
+#endif
+
+/* Object generator function for temp_override, so you don't need to write the
+   type of the object as a template argument.
+
+   Use as auto x = make_temp_override (flag); */
+
+template 
+inline temp_override
+make_temp_override (T& var)
+{
+  return { var };
+}
+
+/* Likewise, but use as auto x = make_temp_override (flag, value); */
+
+template 
+inline temp_override
+make_temp_override (T& var, type_identity_t overrider)
+{
+  return { var, overrider };
+}
+
 /* The cached class binding level, from the most recently exited
class, or NULL if none.  */
 
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index ae93fe1d7f0..b56eb113fd6 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -11513,7 +11513,7 @@ grokdeclarator (const cp_declarator *declarator,
 
   /* An object declared as __attribute__((deprecated)) suppresses
  warnings of uses of other deprecated items.  */
-  temp_override ds (deprecated_state);
+  auto ds = make_temp_override (deprecated_state);
   if (attrlist && lookup_attribute ("deprecated", *attrlist))
 deprecated_state = DEPRECATED_SUPPRESS;
 
diff --git a/gcc/cp/except.c b/gcc/cp/except.c
index 0f6c76b9892..e76ade2f925 100644
--- a/gcc/cp/except.c
+++ b/gcc/cp/except.c
@@ -1101,7 +1101,7 @@ maybe_noexcept_warning (tree fn)
   && (!DECL_IN_SYSTEM_HEADER (fn)
  || global_dc->dc_warn_system_headers))
 {
-  temp_override s (global_dc->dc_warn_system_headers, true);
+  auto s = make_temp_override (global_dc->dc_warn_system_headers, true);
   auto_diagnostic_group d;
   if (warning (OPT_Wnoexcept, "noexcept-expression evaluates to % "
   "because of a call to %qD", fn))
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 39957d4b6a9..7ea8c28830e 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -19732,7 +19732,7 @@ cp_parser_enum_specifier (cp_parser* parser)
   bool is_unnamed = false;
   tree underlying_type = NULL_TREE;
   cp_token *type_start_token = NULL;
-  temp_override cleanup (parser->colon_corrects_to_scope_p, false);
+  auto cleanup = make_temp_override (parser->colon_corrects_to_scope_p, false);
 
   /* Parse tentatively so that we can back up if we don't find a
  enum-specifier.  */
@@ -23381,7 +23381,7 @@ cp_parser_parameter_declaration_clause (cp_parser* 
parser,
   cp_token *token;
   bool ellipsis_p;
 
-  temp_override cleanup
+  auto cleanup = make_temp_override
 (parser->auto_is_implicit_function_template_parm_p);
 
   if (!processing_specialization
@@ -27488,7 +27488,7 @@ cp_parser_gnu_attributes_opt (cp_parser* parser)
 {
   tree attributes = NULL_TREE;
 
-  temp_override cleanup
+  auto cleanup = make_temp_override
 (parser->auto_is_implicit_function_template_parm_p, false);
 
   while (true)
@@ -27688,7 +27688,7 @@ cp_parser_std_attribute (cp_parser *parser, tree 
attr_ns)
   tree attribute, attr_id = NULL_TREE, arguments;
   cp_token *token;
 
-  temp_override cleanup
+  auto cleanup = make_temp_override
 (parser->auto_is_implicit_function_template_parm_p, false);
 
   /* First, parse name of the attribute, a.k.a attribute-token.  */

base-commit: 445430e16bd08ade34637d2346ded40dd49de508
-- 
2.27.0

Re: [PATCH 1/2] libstdc++: Add --enable-stdio=stdio_pure option [v2]

2020-12-10 Thread Keith Packard via Gcc-patches

Jonathan Wakely  writes:

> I'll do a bit more testing and push it next week.

That's awesome news. Thanks so much for you help; I'm looking forward to
having real C++ support for my embedded customers!

-- 
-keith


signature.asc
Description: PGP signature

c++: name lookup API for modules

2020-12-10 Thread Nathan Sidwell



This adds a set of calls to name lookup that are needed by modules.
Generally installing imported bindings, or walking the current TU's
bindings.  One note about template instantiations though.  When we're
about to instantiate a template we have to know about all the
maybe-partial specializations that exist.  These can be in any
imported module -- not necesarily the module defining the template.
Thus we key such foreign templates to the innermost namespace and
identifier of the containing entitity -- that's the only thing we have
a handle on.  That's why we note and load pending specializations here.

gcc/cp/
* module.cc (lazy_specializations_p): Stub.
* name-lookup.h (append_imported_binding_slot)
(mergeable_namespacE_slots, lookup_class_binding)
(walk_module_binding, import_module_binding, set_module_binding)
(note_pending_specializations, load_pending_specializations)
(add_module_decl, add_imported_namespace): Declare.
(get_cxx_dialect_name): Declare.
(enum WMB_flags): New.
* name-lookup.c (append_imported_binding_slot)
(mergeable_namespacE_slots, lookup_class_binding)
(walk_module_binding, import_module_binding, set_module_binding)
(note_pending_specializations, load_pending_specializations)
(add_module_decl, add_imported_namespace): New.
(get_cxx_dialect_name): Make extern.

pushing to trunk

--
Nathan Sidwell
diff --git i/gcc/cp/module.cc w/gcc/cp/module.cc
index c98df14c45e..11eb6dabb04 100644
--- i/gcc/cp/module.cc
+++ w/gcc/cp/module.cc
@@ -156,6 +156,12 @@ lazy_load_members (tree)
 {
 }
 
+bool
+lazy_specializations_p (unsigned, bool, bool)
+{
+  return false;
+}
+
 bitmap
 visible_instantiation_path (bitmap *)
 {
diff --git i/gcc/cp/name-lookup.c w/gcc/cp/name-lookup.c
index 7dd4efa0a85..03d2bc5984b 100644
--- i/gcc/cp/name-lookup.c
+++ w/gcc/cp/name-lookup.c
@@ -324,6 +324,55 @@ get_fixed_binding_slot (tree *slot, tree name, unsigned ix, int create)
   return reinterpret_cast (&cluster.slots[off]);
 }
 
+/* *SLOT is a namespace binding slot.  Append a slot for imported
+   module IX.  */
+
+static binding_slot *
+append_imported_binding_slot (tree *slot, tree name, unsigned ix)
+{
+  gcc_checking_assert (ix);
+
+  if (!*slot ||  TREE_CODE (*slot) != BINDING_VECTOR)
+/* Make an initial module vector.  */
+get_fixed_binding_slot (slot, name, BINDING_SLOT_GLOBAL, -1);
+  else if (!BINDING_VECTOR_CLUSTER_LAST (*slot)
+	   ->indices[BINDING_VECTOR_SLOTS_PER_CLUSTER - 1].span)
+/* There is space in the last cluster.  */;
+  else if (BINDING_VECTOR_NUM_CLUSTERS (*slot)
+	   != BINDING_VECTOR_ALLOC_CLUSTERS (*slot))
+/* There is space in the vector.  */
+BINDING_VECTOR_NUM_CLUSTERS (*slot)++;
+  else
+{
+  /* Extend the vector.  */
+  unsigned have = BINDING_VECTOR_NUM_CLUSTERS (*slot);
+  unsigned want = (have * 3 + 1) / 2;
+
+  if (want > (unsigned short)~0)
+	want = (unsigned short)~0;
+
+  tree new_vec = make_binding_vec (name, want);
+  BINDING_VECTOR_NUM_CLUSTERS (new_vec) = have + 1;
+  memcpy (BINDING_VECTOR_CLUSTER_BASE (new_vec),
+	  BINDING_VECTOR_CLUSTER_BASE (*slot),
+	  have * sizeof (binding_cluster));
+  *slot = new_vec;
+}
+
+  binding_cluster *last = BINDING_VECTOR_CLUSTER_LAST (*slot);
+  for (unsigned off = 0; off != BINDING_VECTOR_SLOTS_PER_CLUSTER; off++)
+if (!last->indices[off].span)
+  {
+	/* Fill the free slot of the cluster.  */
+	last->indices[off].base = ix;
+	last->indices[off].span = 1;
+	last->slots[off] = NULL_TREE;
+	return &last->slots[off];
+  }
+
+  gcc_unreachable ();
+}
+
 /* Add DECL to the list of things declared in binding level B.  */
 
 static void
@@ -3835,6 +3884,23 @@ pushdecl (tree x, bool hiding)
   return ret;
 }
 
+/* A mergeable entity is being loaded into namespace NS slot NAME.
+   Create and return the appropriate vector slot for that.  Either a
+   GMF slot or a module-specific one.  */
+
+tree *
+mergeable_namespace_slots (tree ns, tree name, bool is_global, tree *vec)
+{
+  tree *mslot = find_namespace_slot (ns, name, true);
+  tree *vslot = get_fixed_binding_slot
+(mslot, name, is_global ? BINDING_SLOT_GLOBAL : BINDING_SLOT_PARTITION, true);
+
+  gcc_checking_assert (TREE_CODE (*mslot) == BINDING_VECTOR);
+  *vec = *mslot;
+
+  return vslot;
+}
+
 /* DECL is a new mergeable namespace-scope decl.  Add it to the
mergeable entities on GSLOT.  */
 
@@ -3844,6 +3910,286 @@ add_mergeable_namespace_entity (tree *gslot, tree decl)
   *gslot = ovl_make (decl, *gslot);
 }
 
+/* A mergeable entity of KLASS called NAME is being loaded.  Return
+   the set of things it could be.  All such non-as_base classes have
+   been given a member vec.  */
+
+tree
+lookup_class_binding (tree klass, tree name)
+{
+  tree found = NULL_TREE;
+
+  if (!COMPLETE_TYPE_P (klass))
+;
+  else if (TYPE_LANG_SPECIFIC (klass))
+{
+  vec *member_vec = CLASSTYPE_M

[PATCH] x86: Update user interrupt handler stack frame

2020-12-10 Thread H.J. Lu via Gcc-patches

User interrupt handler stack frame is similar to exception interrupt
handler stack frame.  Instead of error code, the second argument is
user interrupt request register vector.

gcc/

PR target/98219
* config/i386/uintrintrin.h (__uintr_frame): Remove uirrv.

gcc/testsuite/

PR target/98219
* gcc.dg/guality/pr98219-1.c: New test.
* gcc.dg/guality/pr98219-2.c: Likewise.
* gcc.dg/torture/pr98219-1.c: Likewise.
* gcc.dg/torture/pr98219-2.c: Likewise.
* gcc.target/i386/uintr-2.c: Scan "add[lq] $8, %[er]sp".
(foo): Add an unsigned long long argument.
(UINTR_hanlder): Likewise.
* gcc.target/i386/uintr-3.c: Scan "add[lq] $8, %[er]sp".
(UINTR_hanlder): Add an unsigned long long argument.
* gcc.target/i386/uintr-4.c (UINTR_hanlder): Likewise.
* gcc.target/i386/uintr-5.c (UINTR_hanlder): Likewise.
---
 gcc/config/i386/uintrintrin.h|  3 --
 gcc/testsuite/gcc.dg/guality/pr98219-1.c | 48 ++
 gcc/testsuite/gcc.dg/guality/pr98219-2.c | 63 
 gcc/testsuite/gcc.dg/torture/pr98219-1.c | 44 +
 gcc/testsuite/gcc.dg/torture/pr98219-2.c | 59 ++
 gcc/testsuite/gcc.target/i386/uintr-2.c  |  5 +-
 gcc/testsuite/gcc.target/i386/uintr-3.c  |  4 +-
 gcc/testsuite/gcc.target/i386/uintr-4.c  |  4 +-
 gcc/testsuite/gcc.target/i386/uintr-5.c  |  2 +-
 9 files changed, 223 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/guality/pr98219-1.c
 create mode 100644 gcc/testsuite/gcc.dg/guality/pr98219-2.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr98219-1.c
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr98219-2.c

diff --git a/gcc/config/i386/uintrintrin.h b/gcc/config/i386/uintrintrin.h
index 991f6427971..4606caf8582 100644
--- a/gcc/config/i386/uintrintrin.h
+++ b/gcc/config/i386/uintrintrin.h
@@ -38,9 +38,6 @@
 
 struct __uintr_frame
 {
-  /* The position of the most significant bit set in user-interrupt
- request register.  */
-  unsigned long long uirrv;
   /* RIP of the interrupted user process.  */
   unsigned long long rip;
   /* RFLAGS of the interrupted user process.  */
diff --git a/gcc/testsuite/gcc.dg/guality/pr98219-1.c 
b/gcc/testsuite/gcc.dg/guality/pr98219-1.c
new file mode 100644
index 000..8d695080fd8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/guality/pr98219-1.c
@@ -0,0 +1,48 @@
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } */
+/* { dg-options "-g -muintr -mgeneral-regs-only" } */
+
+#include 
+
+extern void exit (int);
+
+#define UIRRV  0x12345670
+#define RIP0x12345671
+#define RFLAGS 0x12345672
+#define RSP0x12345673
+
+#define STRING(x)  XSTRING(x)
+#define XSTRING(x) #x
+#define ASMNAME(cname)  ASMNAME2 (__USER_LABEL_PREFIX__, cname)
+#define ASMNAME2(prefix, cname) XSTRING (prefix) cname
+
+__attribute__((interrupt, used))
+void
+fn (struct __uintr_frame *frame, unsigned long long uirrv)
+{
+  if (UIRRV != uirrv)  /* BREAK */
+__builtin_abort ();
+  if (RIP != frame->rip)
+__builtin_abort ();
+  if (RFLAGS != frame->rflags)
+__builtin_abort ();
+  if (RSP != frame->rsp)
+__builtin_abort ();
+
+  exit (0);
+}
+
+int
+main ()
+{
+  asm ("push   $" STRING (RSP) ";  \
+   push$" STRING (RFLAGS) ";   \
+   push$" STRING (RIP) ";  \
+   push$" STRING (UIRRV) ";\
+   jmp " ASMNAME ("fn"));
+  return 0;
+}
+
+/* { dg-final { gdb-test 22 "uirrv" "0x12345670" } } */
+/* { dg-final { gdb-test 22 "frame->rip" "0x12345671" } } */
+/* { dg-final { gdb-test 22 "frame->rflags" "0x12345672" } } */
+/* { dg-final { gdb-test 22 "frame->rsp" "0x12345673" } } */
diff --git a/gcc/testsuite/gcc.dg/guality/pr98219-2.c 
b/gcc/testsuite/gcc.dg/guality/pr98219-2.c
new file mode 100644
index 000..c0e48c981de
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/guality/pr98219-2.c
@@ -0,0 +1,63 @@
+/* { dg-do run { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } */
+/* { dg-options "-g -muintr -mgeneral-regs-only" } */
+
+#include 
+
+extern void exit (int);
+typedef int aligned __attribute__((aligned(64)));
+
+#define UIRRV  0x12345670
+#define RIP0x12345671
+#define RFLAGS 0x12345672
+#define RSP0x12345673
+
+#define STRING(x)  XSTRING(x)
+#define XSTRING(x) #x
+#define ASMNAME(cname)  ASMNAME2 (__USER_LABEL_PREFIX__, cname)
+#define ASMNAME2(prefix, cname) XSTRING (prefix) cname
+
+int
+check_int (int *i, int align)
+{
+  *i = 20;
+  if ptrdiff_t) i) & (align - 1)) != 0)
+__builtin_abort ();
+  return *i;
+}
+
+__attribute__((interrupt, used))
+__attribute__((interrupt, used))
+void
+fn (struct __uintr_frame *frame, unsigned long long uirrv)
+{
+  aligned i;
+  if (check_int (&i, __alignof__(i)) != i)
+__builtin_abort ();
+
+  if (UIRRV != uirrv)  /* BREAK */

Re: [PATCH] PowerPC: Map IEEE 128-bit long double built-in functions

2020-12-10 Thread Segher Boessenkool

Hi!

On Thu, Nov 19, 2020 at 06:58:14PM -0500, Michael Meissner wrote:
>   * config/rs6000/rs6000.c (rs6000_mangle_decl_assembler_name): Add
>   support for mapping built-in function names for long double
>   built-in functions if long double is IEEE 128-bit.

Please write what it does, not "add support".  Say what names it maps
to, importantly.  You don't need to list all, but what you wrote is
100% contentless.

> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index a5188553593..35e9c844e17 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -27065,57 +27065,128 @@ rs6000_globalize_decl_name (FILE * stream, tree 
> decl)
> library before you can switch the real*16 type at compile time.
>  
> We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name.  We
> -   only do this if the default is that long double is IBM extended double, 
> and
> -   the user asked for IEEE 128-bit.  */
> +   only do this transformation if the __float128 type is enabled.  This
> +   prevents us from doing the transformation on older 32-bit ports that might
> +   have enabled using IEEE 128-bit floating point as the default long double
> +   type.  */

I still don't understand why you want to support some hypothetical and
untested configuration.

> +  /* { dg-final { scan-assembler {\mbl __ynieee128} } }  */

This kind of thing does not portably work (the function names can have
various prefixes added).

I cannot understand this code, and it does seem far from obviously
correct.  But, okay for trunk if you handle all fallout (and I mean all,
not just "all you consider important").

Segher

Re: [PATCH] c++: Diagnose unstable satisfaction results

2020-12-10 Thread Patrick Palka via Gcc-patches

On Thu, 10 Dec 2020, Jason Merrill wrote:

> On 12/10/20 11:21 AM, Patrick Palka wrote:
> > This implements lightweight heuristical detection and diagnosing of
> > satisfaction results that change at different points in the program,
> > which renders the program as ill-formed NDR as of P2014.  We've recently
> > started to more aggressively cache satisfaction results, and so the goal
> > here is to make this caching behavior more transparent to users.
> > 
> > A satisfaction result is flagged as "potentially unstable" (at the atom
> > granularity) if during its computation, some type completion failure
> > occurs.  This is detected by making complete_type_or_maybe_complain
> > increment a counter upon failure and comparing the value of the counter
> > before and after satisfaction.  (We don't instrument complete_type
> > directly because it's used "opportunistically" in many spots where type
> > completion failure doesn't necessary lead to substitution failure.)
> > 
> > Flagged satisfaction results are always recomputed from scratch, even
> > when performing satisfaction quietly.  We then compare the recomputed
> > result with the cached result, and if they differ, proceed with
> > diagnosing the instability.  (We may also unflag a result if it turned
> > out to be independent of the previously detected type completion
> > failure.)  When performing satisfaction noisily, we always check
> > instability.
> > 
> > Most of the implementation is confined to the satisfaction_cache class,
> > which has been completely rewritten.
> > 
> > Bootstrapped and regtested on x86_64-pc-linux-gnu, and also tested on
> > cmcstl2 and range-v3.  The static_assert failures in the view.join test
> > from cmcstl2 are now elaborated on after this patch, and additionally
> > the alg.equal_range test now fails for the same reason as the view.join
> > test.
> > 
> > gcc/cp/ChangeLog:
> > 
> > * constraint.cc (failed_type_completion_count): New.
> > (note_failed_type_completion_for_satisfaction): New.
> > (sat_entry::constr): Rename to ...
> > (sat_entry::atom): ... this.
> > (sat_entry::location): New member.
> > (sat_entry::maybe_unstable): New member.
> > (sat_entry::diagnose_instability): New member.
> > (struct sat_hasher): Adjust after the above renaming.
> > (get_satisfaction, save_satisfaction): Remove.
> > (satisfaction_cache): Rewrite completely.
> > (satisfy_atom): When instantiation of the parameter mapping
> > fails, set diagnose_instability.  Propagate location from
> > inst_cache.entry to cache.entry if the secondary lookup
> > succeeded.
> > (satisfy_declaration_constraints): When
> > failed_type_completion_count differs before and after
> > satisfaction, then don't cache the satisfaction result.
> > * cp-tree.h (note_failed_type_completion_for_satisfaction):
> > Declare.
> > * pt.c (tsubst) : Use
> > complete_type_or_maybe_complain instead of open-coding it.
> > * typeck.c (complete_type_or_maybe_complain): Call
> > note_failed_type_completion_for_satisfaction when type
> > completion fails.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp2a/concepts-complete1.C: New test.
> > * g++.dg/cpp2a/concepts-complete2.C: New test.
> > * g++.dg/cpp2a/concepts-complete3.C: New test.
> > ---
> >   gcc/cp/constraint.cc  | 283 ++
> >   gcc/cp/cp-tree.h  |   2 +
> >   gcc/cp/pt.c   |   9 +-
> >   gcc/cp/typeck.c   |   1 +
> >   .../g++.dg/cpp2a/concepts-complete1.C |  18 ++
> >   .../g++.dg/cpp2a/concepts-complete2.C |  23 ++
> >   .../g++.dg/cpp2a/concepts-complete3.C |  16 +
> >   7 files changed, 282 insertions(+), 70 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-complete1.C
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-complete2.C
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-complete3.C
> > 
> > diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
> > index 73c038e3afe..ee702b34d01 100644
> > --- a/gcc/cp/constraint.cc
> > +++ b/gcc/cp/constraint.cc
> > @@ -2374,35 +2374,82 @@ tsubst_parameter_mapping (tree map, tree args,
> > tsubst_flags_t complain, tree in_
> >   Constraint satisfaction
> >   
> > ---*/
> >   -/* Hash functions for satisfaction entries.  */
> > +/* A counter incremented by note_failed_type_completion_for_satisfaction().
> > +   It's used by the satisfaction caches in order to flag "potentially
> > unstable"
> > +   satisfaction results.  */
> > +
> > +static unsigned failed_type_completion_count;
> > +
> > +/* Called whenever a type completion failure occurs that definitely affects
> > +   the semantics of the program, by e.g. inducing substitution failure.  */
> > +
> > +void
> > +note_

[Ada] Fix PR ada/98230

2020-12-10 Thread Eric Botcazou

It's a rather curious malfunction of the 'Mod attribute applied to the 
variable of a loop whose upper bound is dynamic.

Tested on x86-64/Linux, applied on all active branches.


2020-12-10  Ed Schonberg  

PR ada/98230
* exp_attr.adb (Expand_N_Attribute_Reference, case Mod): Use base
type of argument to obtain static bound and required size.


2020-12-10  Eric Botcazou  

* gnat.dg/modular6.adb: New test.

-- 
Eric Botcazoudiff --git a/gcc/ada/exp_attr.adb b/gcc/ada/exp_attr.adb
index 251fa1449c4..b21592c78b2 100644
--- a/gcc/ada/exp_attr.adb
+++ b/gcc/ada/exp_attr.adb
@@ -4702,13 +4702,15 @@ package body Exp_Attr is
 
   when Attribute_Mod => Mod_Case : declare
  Arg  : constant Node_Id := Relocate_Node (First (Exprs));
- Hi   : constant Node_Id := Type_High_Bound (Etype (Arg));
+ Hi   : constant Node_Id := Type_High_Bound (Base_Type (Etype (Arg)));
  Modv : constant Uint:= Modulus (Btyp);
 
   begin
 
  --  This is not so simple. The issue is what type to use for the
- --  computation of the modular value.
+ --  computation of the modular value. In addition we need to use
+ --  the base type as above to retrieve a static bound for the
+ --  comparisons that follow.
 
  --  The easy case is when the modulus value is within the bounds
  --  of the signed integer type of the argument. In this case we can
-- { dg-do compile }

with Ada.Text_IO; use Ada.Text_IO;

procedure Modular6 is
   Max : Integer := 0;
   
   type Modulus is mod 3;
begin
   Max := 30;
   
   for N in 1 .. Max loop
  Put_Line("N: " & Integer'Image(N) & " Modulus:" & Integer'Image(Modulus'Modulus) & " Mod:" & Modulus'Image(Modulus'Mod(N)));
   end loop;
end;

[PATCH, rs6000] Update "prefix" attribute for Power10

2020-12-10 Thread Pat Haugen via Gcc-patches

Update prefixed attribute for Power10.


This patch was broken out from my larger patch to update various attributes for
Power10, in order to make the review process hopefully easier. This patch only
updates the prefix attribute for various new instructions. Changes in this
version include missed updates to rs6000_insn_cost and
rs6000_adjust_insn_length. I stayed with the new 'always' keyword but added
additional commentary so hopefully is more clear.

Bootstrap/regtest on powerpc64le (Power8/Power10) with no new regressions. Ok 
for trunk?

-Pat


2020-11-10  Pat Haugen  

gcc/
* config/rs6000/altivec.md (xxspltiw_v4si, xxspltiw_v4sf_inst,
xxspltidp_v2df_inst, xxsplti32dx_v4si_inst, xxsplti32dx_v4sf_inst,
xxblend_, xxpermx_inst, xxeval): Mark prefixed "always".
* config/rs6000/mma.md (mma_, mma_,
mma_, mma_, mma_, mma_,
mma_, mma_, mma_, mma_):
Likewise.
* config/rs6000/rs6000.c (rs6000_insn_cost): Update test for prefixed
insn.
(next_insn_prefixed_p): Rename to prefix_next_insn_p.
(rs6000_final_prescan_insn): Only add 'p' for PREFIXED_YES.
(rs6000_asm_output_opcode): Adjust.
(rs6000_adjust_insn_length): Update test for prefixed insns.
* config/rs6000/rs6000.md (define_attr "prefixed"): Add 'always'
and update commentary.
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 6a6ce0f84ed..fc926f7a7aa 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -828,7 +828,8 @@ (define_insn "xxspltiw_v4si"
 UNSPEC_XXSPLTIW))]
  "TARGET_POWER10"
  "xxspltiw %x0,%1"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+  (set_attr "prefixed" "always")])
 
 (define_expand "xxspltiw_v4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=wa")
@@ -847,7 +848,8 @@ (define_insn "xxspltiw_v4sf_inst"
 UNSPEC_XXSPLTIW))]
  "TARGET_POWER10"
  "xxspltiw %x0,%1"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+  (set_attr "prefixed" "always")])
 
 (define_expand "xxspltidp_v2df"
   [(set (match_operand:V2DF 0 "register_operand" )
@@ -866,7 +868,8 @@ (define_insn "xxspltidp_v2df_inst"
 UNSPEC_XXSPLTID))]
   "TARGET_POWER10"
   "xxspltidp %x0,%1"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "prefixed" "always")])
 
 (define_expand "xxsplti32dx_v4si"
   [(set (match_operand:V4SI 0 "register_operand" "=wa")
@@ -895,7 +898,8 @@ (define_insn "xxsplti32dx_v4si_inst"
 UNSPEC_XXSPLTI32DX))]
   "TARGET_POWER10"
   "xxsplti32dx %x0,%2,%3"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "prefixed" "always")])
 
 (define_expand "xxsplti32dx_v4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=wa")
@@ -923,7 +927,8 @@ (define_insn "xxsplti32dx_v4sf_inst"
 UNSPEC_XXSPLTI32DX))]
   "TARGET_POWER10"
   "xxsplti32dx %x0,%2,%3"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "prefixed" "always")])
 
 (define_insn "xxblend_"
   [(set (match_operand:VM3 0 "register_operand" "=wa")
@@ -933,7 +938,8 @@ (define_insn "xxblend_"
UNSPEC_XXBLEND))]
   "TARGET_POWER10"
   "xxblendv %x0,%x1,%x2,%x3"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "prefixed" "always")])
 
 (define_expand "xxpermx"
   [(set (match_operand:V2DI 0 "register_operand" "+wa")
@@ -977,7 +983,8 @@ (define_insn "xxpermx_inst"
 UNSPEC_XXPERMX))]
   "TARGET_POWER10"
   "xxpermx %x0,%x1,%x2,%x3,%4"
-  [(set_attr "type" "vecsimple")])
+  [(set_attr "type" "vecsimple")
+   (set_attr "prefixed" "always")])
 
 (define_expand "vstrir_"
   [(set (match_operand:VIshort 0 "altivec_register_operand")
@@ -3625,7 +3632,8 @@ (define_insn "xxeval"
 UNSPEC_XXEVAL))]
"TARGET_POWER10"
"xxeval %0,%1,%2,%3,%4"
-   [(set_attr "type" "vecsimple")])
+   [(set_attr "type" "vecsimple")
+(set_attr "prefixed" "always")])
 
 (define_expand "vec_unpacku_hi_v16qi"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 4d291c42f7a..a87a1eb132c 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -540,6 +540,7 @@ (define_insn "mma_"
   "TARGET_MMA"
   " %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
+   (set_attr "prefixed" "always")
(set_attr "length" "8")])
 
 (define_insn "mma_"
@@ -554,6 +555,7 @@ (define_insn "mma_"
   "TARGET_MMA"
   " %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
+   (set_attr "prefixed" "always")
(set_attr "length" "8")])
 
 (define_insn "mma_"
@@ -567,6 +569,7 @@ (define_insn "mma_"
   "TARGET_MMA"
   " %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
+   (set_attr "prefixed" "always")
(set_attr "length" "8")])
 
 (define_insn "mma_"
@@ -581,6 +584,7 @@ (defi

[patch] [PR tree-optimization/98174] Reduce memory requirements for ranger

2020-12-10 Thread Andrew MacLeod via Gcc-patches


With very large CFG's ranger on entry cache is not particularly efficient.

One thing I never got to was recognizing that if an ssa-name is never 
used in an outgoing edge calculation, then its range never changes.. the 
global range is sufficient and we do not need to propagate the on-entry 
cache with it.  Many SSA_NAMES fall into this category.


This patch implements this by preprocessing the exit of each block 
(which would be done anyway, we just do it up front now) and keeping a 
cumulative map of anything which is exported.   Anything without the bit 
set is a "pure global" which means nothing ever changes the range.


Then when it comes time to populate the on-entry cache, we just bail if 
its a pure global.


THis seems to resolve most of the memory issues I am seeing. One side 
effect is ranger now doesnt fully propagate the non-null property which 
is a product of statement side effects.  This is the only thing in 
ranger which can affect a range but isnt a block export.   This won't 
impact the hybrid EVRP model since EVRP continues to track this 
information.   I will keep an eye on whether this has any impact on the 
other couple of passes which are using ranger..  If necessary, we can 
easily make this a hybrid mode only mode, but for this release it may 
not be needed.


Next release, I plan to handle the non-null processing in a completely 
different way anyway when we add the facility to query for general 
statement side effects.  so this is in fact a future compatible change.


There is still an time increase issue in pr91257 that I am continuing to 
look at to see if there is anything further to be done;  it seems mostly 
related to PHIs and the new edge processing. At least this seems to 
resolve the excessive memory issues in the specified PRs.


I will also continue to monitor if we need to re-enable anything for the 
non-null processing outside of hybrid.


Bootstrapped on x86_64-pc-linux-gnu, no regressions , pushed.

Andrew


commit 7f359556a772e26eabf8d31e53aae1de6f2f200d
Author: Andrew MacLeod 
Date:   Thu Dec 10 14:59:14 2020 -0500

Reduce memory requirements for ranger

Calculate block exit info upfront, and then any SSA_NAME which is never
used in an outgoing range calculation is a pure global and can bypass the
on-entry cache.

PR tree-optimization/98174
* gimple-range-cache.cc (ranger_cache::ssa_range_in_bb): Only push
poor values to be examined if it isn't a pure global.
(ranger_cache::block_range): Don't process pure globals.
(ranger_cache::fill_block_cache): Adjust has_edge_range call.
* gimple-range-gori.cc (gori_map::all_outgoing): New bitmap.
(gori_map::gori_map): Allocate all_outgoing.
(gori_map::is_export_p): No specified BB returns global context.
(gori_map::calculate_gori): Accumulate each block into global.
(gori_compute::gori_compute): Preprocess each block for exports.
(gori_compute::has_edge_range_p): No edge returns global context.
* gimple-range-gori.h (has_edge_range_p): Provide default parameter.

diff --git a/gcc/gimple-range-cache.cc b/gcc/gimple-range-cache.cc
index b01563c83f9..edebad45a50 100644
--- a/gcc/gimple-range-cache.cc
+++ b/gcc/gimple-range-cache.cc
@@ -779,8 +779,10 @@ ranger_cache::ssa_range_in_bb (irange &r, tree name, 
basic_block bb)
   // Look for the on-entry value of name in BB from the cache.
   else if (!m_on_entry.get_bb_range (r, name, bb))
 {
-  // If it has no entry then mark this as a poor value.
-  if (push_poor_value (bb, name))
+  // If it has no entry but should, then mark this as a poor value.
+  // Its not a poor value if it does not have *any* edge ranges,
+  // Then global range is as good as it gets.
+  if (has_edge_range_p (name) && push_poor_value (bb, name))
{
  if (DEBUG_RANGE_CACHE)
{
@@ -812,6 +814,11 @@ ranger_cache::block_range (irange &r, basic_block bb, tree 
name, bool calc)
 {
   gcc_checking_assert (gimple_range_ssa_p (name));
 
+  // If there are no range calculations anywhere in the IL, global range
+  // applies everywhere, so don't bother caching it.
+  if (!has_edge_range_p (name))
+return false;
+
   if (calc)
 {
   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
@@ -1072,7 +1079,7 @@ ranger_cache::fill_block_cache (tree name, basic_block 
bb, basic_block def_bb)
{
  if (DEBUG_RANGE_CACHE)
fprintf (dump_file, "has cache, ");
- if (!r.undefined_p () || has_edge_range_p (e, name))
+ if (!r.undefined_p () || has_edge_range_p (name, e))
{
  add_to_update (node);
  if (DEBUG_RANGE_CACHE)
diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index af3609e414e..ac13718f7e6 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -229,17 +229

Re: [PATCH] Correct -fdump-go-spec's handling of incomplete types

2020-12-10 Thread Rainer Orth

Hi Nikhil,

> On 12/10/20 2:34 PM, Rainer Orth wrote:
>> I've just checked:  is effectively unchanged since
>> Solaris 10.
>>
>> Besides, there's gcc211 in the GCC compile farm, running Solaris 11.3/SPARC.
>
> Ah, thanks, I wasn't aware there was a compile farm available to GCC
> developers. I've applied for an account, but it sounds like it may take
> a while to get approved.

it depends: sometimes they're very quick, at others it takes several
reminders.

> My theory was wrong, by the way. This C snippet, representative of the
> Solaris headers, expands just fine:
[...]
> Something else is afoot here, but I'm not sure what.

I'm attaching the -save-temps output, so you can work on the real data
rather than trying to figure things out from the Illumos repos.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University




sysinfo.i.bz2
Description: Binary data

Re: [PATCH] ira.c: Fix ICE in ira-color [PR97092]

2020-12-10 Thread Vladimir Makarov via Gcc-patches




On 2020-12-10 10:12 a.m., Andrea Corallo wrote:

Hi all,

following discussion on PR97092 I'd like to submit the following patch
with a fix plus associated testcase.

With this patch applied mode is recomputed at each iteration while
looping across different copies in 'update_costs_from_allocno', this
instead of carrying mode over subsequent iterations.

bootstrapped and regtested on aarch64-unknown-linux-gnu.

Okay for trunk?


Yes.

The patch is safe.  Thank you for working on the PR.

Re: [PATCH] x86: Update user interrupt handler stack frame

2020-12-10 Thread Uros Bizjak via Gcc-patches

On Thu, Dec 10, 2020 at 10:20 PM H.J. Lu  wrote:
>
> User interrupt handler stack frame is similar to exception interrupt
> handler stack frame.  Instead of error code, the second argument is
> user interrupt request register vector.
>
> gcc/
>
> PR target/98219
> * config/i386/uintrintrin.h (__uintr_frame): Remove uirrv.
>
> gcc/testsuite/
>
> PR target/98219
> * gcc.dg/guality/pr98219-1.c: New test.
> * gcc.dg/guality/pr98219-2.c: Likewise.
> * gcc.dg/torture/pr98219-1.c: Likewise.
> * gcc.dg/torture/pr98219-2.c: Likewise.
> * gcc.target/i386/uintr-2.c: Scan "add[lq] $8, %[er]sp".
> (foo): Add an unsigned long long argument.
> (UINTR_hanlder): Likewise.
> * gcc.target/i386/uintr-3.c: Scan "add[lq] $8, %[er]sp".
> (UINTR_hanlder): Add an unsigned long long argument.
> * gcc.target/i386/uintr-4.c (UINTR_hanlder): Likewise.
> * gcc.target/i386/uintr-5.c (UINTR_hanlder): Likewise.

OK with the fixes, described inline.

Thanks,
Uros.

> ---
>  gcc/config/i386/uintrintrin.h|  3 --
>  gcc/testsuite/gcc.dg/guality/pr98219-1.c | 48 ++
>  gcc/testsuite/gcc.dg/guality/pr98219-2.c | 63 
>  gcc/testsuite/gcc.dg/torture/pr98219-1.c | 44 +
>  gcc/testsuite/gcc.dg/torture/pr98219-2.c | 59 ++
>  gcc/testsuite/gcc.target/i386/uintr-2.c  |  5 +-
>  gcc/testsuite/gcc.target/i386/uintr-3.c  |  4 +-
>  gcc/testsuite/gcc.target/i386/uintr-4.c  |  4 +-
>  gcc/testsuite/gcc.target/i386/uintr-5.c  |  2 +-
>  9 files changed, 223 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/guality/pr98219-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/guality/pr98219-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr98219-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr98219-2.c
>
> diff --git a/gcc/config/i386/uintrintrin.h b/gcc/config/i386/uintrintrin.h
> index 991f6427971..4606caf8582 100644
> --- a/gcc/config/i386/uintrintrin.h
> +++ b/gcc/config/i386/uintrintrin.h
> @@ -38,9 +38,6 @@
>
>  struct __uintr_frame
>  {
> -  /* The position of the most significant bit set in user-interrupt
> - request register.  */
> -  unsigned long long uirrv;
>/* RIP of the interrupted user process.  */
>unsigned long long rip;
>/* RFLAGS of the interrupted user process.  */
> diff --git a/gcc/testsuite/gcc.dg/guality/pr98219-1.c 
> b/gcc/testsuite/gcc.dg/guality/pr98219-1.c
> new file mode 100644
> index 000..8d695080fd8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/guality/pr98219-1.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } */

No need for extra curly braces after !, so:

... & { ! ia32 } ...

here and in other tests.

> +/* { dg-options "-g -muintr -mgeneral-regs-only" } */
> +
> +#include 
> +
> +extern void exit (int);
> +
> +#define UIRRV  0x12345670
> +#define RIP0x12345671
> +#define RFLAGS 0x12345672
> +#define RSP0x12345673
> +
> +#define STRING(x)  XSTRING(x)
> +#define XSTRING(x) #x
> +#define ASMNAME(cname)  ASMNAME2 (__USER_LABEL_PREFIX__, cname)
> +#define ASMNAME2(prefix, cname) XSTRING (prefix) cname
> +
> +__attribute__((interrupt, used))
> +void
> +fn (struct __uintr_frame *frame, unsigned long long uirrv)

Please define and use

typedef unsigned int uword_t __attribute__ ((mode (__word__)));

as is the case in interrupt-3.c. Also, the convention is:

void
__attribute_((...))
fn (...)

Please also fix in other tests.

> +{
> +  if (UIRRV != uirrv)  /* BREAK */
> +__builtin_abort ();
> +  if (RIP != frame->rip)
> +__builtin_abort ();
> +  if (RFLAGS != frame->rflags)
> +__builtin_abort ();
> +  if (RSP != frame->rsp)
> +__builtin_abort ();
> +
> +  exit (0);
> +}
> +
> +int
> +main ()
> +{
> +  asm ("push   $" STRING (RSP) ";  \
> +   push$" STRING (RFLAGS) ";   \
> +   push$" STRING (RIP) ";  \
> +   push$" STRING (UIRRV) ";\
> +   jmp " ASMNAME ("fn"));
> +  return 0;
> +}
> +
> +/* { dg-final { gdb-test 22 "uirrv" "0x12345670" } } */
> +/* { dg-final { gdb-test 22 "frame->rip" "0x12345671" } } */
> +/* { dg-final { gdb-test 22 "frame->rflags" "0x12345672" } } */
> +/* { dg-final { gdb-test 22 "frame->rsp" "0x12345673" } } */
> diff --git a/gcc/testsuite/gcc.dg/guality/pr98219-2.c 
> b/gcc/testsuite/gcc.dg/guality/pr98219-2.c
> new file mode 100644
> index 000..c0e48c981de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/guality/pr98219-2.c
> @@ -0,0 +1,63 @@
> +/* { dg-do run { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } */
> +/* { dg-options "-g -muintr -mgeneral-regs-only" } */
> +
> +#include 
> +
> +extern void exit (int);
> +typedef int aligned __attribute__((aligned(64)));
> +
> +#define UIRRV  0x12345670
> +#define RIP0x12345671
> +#d

Re: [PATCH 2/1] c++: Diagnose self-recursive satisfaction

2020-12-10 Thread Patrick Palka via Gcc-patches

On Thu, 10 Dec 2020, Patrick Palka wrote:

> This patch further extends the satisfaction_cache class to diagnose
> self-recursive satisfaction.
> 
> With this patch, a few more cmcstl2 tests fail at compile time due apparent
> self-recursive satisfaction.  I didn't analyze these failures
> individually, but all errors contain view_interface::operator bool() in
> their template instantiation backtrace, and applying a workaround
> similar to PR97600#c3 to include/stl2/view/view_interface.hpp fixes all
> observed failures.

Whoops, I meant to say #c2 not #c3 here.  For the record, here's the
workaround in question:

diff --git a/include/stl2/view/view_interface.hpp 
b/include/stl2/view/view_interface.hpp
index ace9e983..dea29896 100644
--- a/include/stl2/view/view_interface.hpp
+++ b/include/stl2/view/view_interface.hpp
@@ -84,14 +84,15 @@ STL2_OPEN_NAMESPACE {
return begin(d) == end(d);
}
 
-   constexpr explicit operator bool()
+   template T>
+   constexpr explicit operator T()
// Distinct named concept to workaround 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82740
requires detail::CanEmpty {
return !__stl2::empty(derived());
}
// Distinct named concept to workaround 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82740
-   template // gcc_bugs_bugs_bugs
-   constexpr explicit operator bool() const {
+   template T, detail::CanEmpty = const D> // 
gcc_bugs_bugs_bugs
+   constexpr explicit operator T() const {
return !__stl2::empty(derived());
}
template

[committed] libstdc++: Remove redundant branches in countl_one and countr_one [PR 98226]

2020-12-10 Thread Jonathan Wakely via Gcc-patches

There's no need to explicitly check for the maximum value, because the
function we call handles it correctly anyway.

libstdc++-v3/ChangeLog:

PR libstdc++/98226
* include/std/bit (__countl_one, __countr_one): Remove redundant
branches.

Tested powerpc64le-linux. Committed to trunk.

commit 2ea62857a3fbdf091ba38cbb62e98dc76b198e2e
Author: Jonathan Wakely 
Date:   Thu Dec 10 21:57:42 2020

libstdc++: Remove redundant branches in countl_one and countr_one [PR 98226]

There's no need to explicitly check for the maximum value, because the
function we call handles it correctly anyway.

libstdc++-v3/ChangeLog:

PR libstdc++/98226
* include/std/bit (__countl_one, __countr_one): Remove redundant
branches.

diff --git a/libstdc++-v3/include/std/bit b/libstdc++-v3/include/std/bit
index 1d99c807c4a..6f47f89ab03 100644
--- a/libstdc++-v3/include/std/bit
+++ b/libstdc++-v3/include/std/bit
@@ -141,8 +141,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 constexpr int
 __countl_one(_Tp __x) noexcept
 {
-  if (__x == __gnu_cxx::__int_traits<_Tp>::__max)
-   return __gnu_cxx::__int_traits<_Tp>::__digits;
   return std::__countl_zero<_Tp>((_Tp)~__x);
 }
 
@@ -184,8 +182,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 constexpr int
 __countr_one(_Tp __x) noexcept
 {
-  if (__x == __gnu_cxx::__int_traits<_Tp>::__max)
-   return __gnu_cxx::__int_traits<_Tp>::__digits;
   return std::__countr_zero((_Tp)~__x);
 }

[PATCH 2/1] c++: Diagnose self-recursive satisfaction

2020-12-10 Thread Patrick Palka via Gcc-patches

This patch further extends the satisfaction_cache class to diagnose
self-recursive satisfaction.

With this patch, a few more cmcstl2 tests fail at compile time due apparent
self-recursive satisfaction.  I didn't analyze these failures
individually, but all errors contain view_interface::operator bool() in
their template instantiation backtrace, and applying a workaround
similar to PR97600#c3 to include/stl2/view/view_interface.hpp fixes all
observed failures.  So I assume the fails are all due to the same latent
self-recursive satisfaction issue that r11-4584 fixed in libstdc++.

gcc/cp/ChangeLog:

* constraint.cc (sat_entry::evaluating): New member.
(satisfaction_cache::get): If entry->evaluating, diagnose
self-recursive satisfaction.  Otherwise, set entry->evaluating
if we're not reusing a cached satisfaction result.
(satisfaction_cache::save): Clear entry->evaluating.
(satisfy_atom): Set up diagnosing_failed_constraint before the
first call to get().

gcc/testsuite/ChangeLog:

PR c++/96840
* g++.dg/cpp2a/concepts-pr88395.C: Adjust to expect the
self-recursive satisfaction to get directly diagnosed.
* g++.dg/cpp2a/concepts-recursive-sat2.C: Likewise.
* g++.dg/cpp2a/concepts-recursive-sat4.C: New test.
---
 gcc/cp/constraint.cc  | 39 +++
 gcc/testsuite/g++.dg/cpp2a/concepts-pr88395.C |  8 ++--
 .../g++.dg/cpp2a/concepts-recursive-sat2.C|  6 +--
 .../g++.dg/cpp2a/concepts-recursive-sat4.C| 13 +++
 4 files changed, 49 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-recursive-sat4.C

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index b98befaf71b..d1bffd598ef 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -2428,6 +2428,11 @@ struct GTY((for_user)) sat_entry
  We don't always want to do so, in order to avoid emitting duplicate
  diagnostics in some cases.  */
   bool diagnose_instability;
+
+  /* True if we're in the middle of computing this satisfaction result.
+ Used to detect self-recursive satisfaction, during both quiet and
+ noisy satisfaction.  */
+  bool evaluating;
 };
 
 struct sat_hasher : ggc_ptr_hash
@@ -2581,6 +2586,7 @@ satisfaction_cache
   mapping, we set this flag (in satisfy_atom) only if substitution
   into its mapping previously failed.  */
entry->diagnose_instability = true;
+  entry->evaluating = false;
   *slot = entry;
 }
   else
@@ -2599,9 +2605,23 @@ satisfaction_cache::get ()
   if (!entry)
 return NULL_TREE;
 
-  if (info.noisy () || entry->maybe_unstable)
-/* We're recomputing the satisfaction result from scratch.  */
-return NULL_TREE;
+  if (entry->evaluating)
+{
+  /* If we get here, it means satisfaction is self-recursive.  */
+  gcc_checking_assert (!entry->result);
+  if (info.noisy ())
+   error_at (EXPR_LOCATION (ATOMIC_CONSTR_EXPR (entry->atom)),
+ "satisfaction of atomic constraint %qE depends on itself",
+ entry->atom);
+  return error_mark_node;
+}
+
+  if (info.noisy () || entry->maybe_unstable || !entry->result)
+{
+  /* We're computing the satisfaction result from scratch.  */
+  entry->evaluating = true;
+  return NULL_TREE;
+}
   else
 return entry->result;
 }
@@ -2616,6 +2636,9 @@ satisfaction_cache::save (tree result)
   if (!entry)
 return result;
 
+  gcc_checking_assert (entry->evaluating);
+  entry->evaluating = false;
+
   if (entry->result && result != entry->result)
 {
   if (info.quiet ())
@@ -2865,6 +2888,11 @@ static void diagnose_atomic_constraint (tree, tree, 
tree, subst_info);
 static tree
 satisfy_atom (tree t, tree args, sat_info info)
 {
+  /* In case there is a diagnostic, we want to establish the context
+ prior to printing errors.  If no errors occur, this context is
+ removed before returning.  */
+  diagnosing_failed_constraint failure (t, args, info.noisy ());
+
   satisfaction_cache cache (t, args, info);
   if (tree r = cache.get ())
 return r;
@@ -2872,11 +2900,6 @@ satisfy_atom (tree t, tree args, sat_info info)
   /* Perform substitution quietly.  */
   subst_info quiet (tf_none, NULL_TREE);
 
-  /* In case there is a diagnostic, we want to establish the context
- prior to printing errors.  If no errors occur, this context is
- removed before returning.  */
-  diagnosing_failed_constraint failure (t, args, info.noisy ());
-
   /* Instantiate the parameter mapping.  */
   tree map = tsubst_parameter_mapping (ATOMIC_CONSTR_MAP (t), args, quiet);
   if (map == error_mark_node)
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-pr88395.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-pr88395.C
index 1c25252d47b..e1792e19005 100644
--- a/gcc/testsuite/g++.dg/cpp2a/concepts-pr88395.C
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-pr88395.C
@@ -1,9 +1,9 @@
 // { dg-

Re: [PATCH v5] Practical Improvement to libgcc Complex Divide

2020-12-10 Thread Jakub Jelinek via Gcc-patches

On Thu, Dec 10, 2020 at 10:27:46AM -0600, Patrick McGehearty via Gcc-patches 
wrote:
> Thank you for your rapid feedback.
> I'll fix the various formatting issues (spaces in the wrong places
> and such as well as revise the Changelog magic) in the next submission.
> It will wait for Joseph's review to also make any changes he suggests.
> I'll also try to train myself to be more sensitive to gcc formatting
> conventions while proofreading.
> 
> I'm reluctant to change or use XALLOCAVEC instead of alloca as that
> is not the current style elsewhere in the routine.

If so, I can fix it incrementally.  But, at least fix up the formatting,
that was the reason I've mentioned XALLOCAVEC, because the alloca call
formatting was off.

> On the strcpy, strncpy, and memcpy question, given short length of
> the string being copied, I don't think it makes much difference.
> The two other copy operations in the file are memcpy.
> memcpy might be slightly better since it is generally more frequently
> seen and more likely that gcc has special case code to inline
> short fixed length memcpy as a few assignments. Even if both strncpy
> and memcpy are inlined, the memcpy code may be simplier as it does
> not need to be concerned with special treatment of nulls.
> I'll change the strncpy to memcpy.

Even if short, strncpy is a badly designed API that in 99% cases just
shouldn't be used.  Either the string is shorter than the passed argument,
then in most cases completely useless zeroes the rest of the buffer
(exception is security sensitive code that needs to overwrite everything
that has been before), or it is the same length and then one should use
strcpy instead, or there is string truncation which doesn't zero terminate,
which is very rarely something one wants to do.

Jakub

[PATCH] aix: Fixinclude updates [PR98208]

2020-12-10 Thread Ilya Leoshkevich via Gcc-patches

Tested on gcc121 (x86_64 CentOS Linux 7).  Ok for master?



After 92648faa1cb2 ("aix: Fixinclude") make check-fixincludes began to
fail (at least on gcc121 machine).  Fix by updating fixincludes/tests
and rerunning genfixes.

fixincludes/ChangeLog:

2020-12-11  Ilya Leoshkevich  

* fixincl.x: Rerun genfixes.
* tests/base/sys/types.h: Add AIX_PHYSADR_T_CHECK.
---
 fixincludes/fixincl.x  | 4 ++--
 fixincludes/tests/base/sys/types.h | 5 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/fixincludes/fixincl.x b/fixincludes/fixincl.x
index 21439652bce..cc17edfba0b 100644
--- a/fixincludes/fixincl.x
+++ b/fixincludes/fixincl.x
@@ -2,11 +2,11 @@
  *
  * DO NOT EDIT THIS FILE   (fixincl.x)
  *
- * It has been AutoGen-ed  October 21, 2020 at 10:43:22 AM by AutoGen 5.18.16
+ * It has been AutoGen-ed  December  9, 2020 at 11:16:08 AM by AutoGen 5.18.16
  * From the definitionsinclhack.def
  * and the template file   fixincl
  */
-/* DO NOT SVN-MERGE THIS FILE, EITHER Wed Oct 21 10:43:22 EDT 2020
+/* DO NOT SVN-MERGE THIS FILE, EITHER Wed Dec  9 11:16:08 EST 2020
  *
  * You must regenerate it.  Use the ./genfixes script.
  *
diff --git a/fixincludes/tests/base/sys/types.h 
b/fixincludes/tests/base/sys/types.h
index 683b5e93ecd..a318f9b713b 100644
--- a/fixincludes/tests/base/sys/types.h
+++ b/fixincludes/tests/base/sys/types.h
@@ -9,6 +9,11 @@
 
 
 
+#if defined( AIX_PHYSADR_T_CHECK )
+typedef struct __physadr_s {
+#endif  /* AIX_PHYSADR_T_CHECK */
+
+
 #if defined( GNU_TYPES_CHECK )
 #if !defined(_GCC_PTRDIFF_T)
 #define _GCC_PTRDIFF_T
-- 
2.25.4

Re: [PATCH] Add a new pattern in 4-insn combine

2020-12-10 Thread HAO CHEN GUI via Gcc-patches


Segher,

    Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2020-November/560573.html

On 30/11/2020 上午 11:08, HAO CHEN GUI wrote:

Hi,

  This patch adds a new pattern(combine 4 insns to 3 insns) in 4-insn 
combine. In the patch, newpat is split twice. The newpat, newi2pat and 
newi1pat replace i3, i2 and i1 respectively. The 4 to 3 combine is 
done at the end where all former attempts fail. In 4 insn combine 
pre-check, the zero and sign extend are added as the patch is for the 
issue 1 listed in pr65010.


  The attachments are the patch diff file and change log file.

  Bootstrapped and tested on powerpc64le, ARM and x86 with no 
regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

[PATCH,rs6000] Fusion patterns for logical-logical

2020-12-10 Thread acsawdey--- via Gcc-patches

From: Aaron Sawdey 

This patch adds a new function to genfusion.pl to generate patterns for
logical-logical fusion. They are enabled by default for power10 and can
be disabled by -mno-power10-fusion-2logical or -mno-power10-fusion.

This patch builds on top of the load-cmpi patch posted earlier this week.

Bootstrap passed on ppc64le/power10, if regtests pass, ok for trunk?

gcc/ChangeLog
* config/rs6000/genfusion.pl (gen_2logical): New function to
generate patterns for logical-logical fusion.
* config/rs6000/fusion.md: Regenerated patterns.
* config/rs6000/rs6000-cpus.def: Add
OPTION_MASK_P10_FUSION_2LOGICAL.
* config/rs6000/rs6000.c (rs6000_option_override_internal):
Enable logical-logical fusion for p10.
* config/rs6000/rs6000.opt: Add -mpower10-fusion-2logical.
---
 gcc/config/rs6000/fusion.md   | 2176 +
 gcc/config/rs6000/genfusion.pl|   89 ++
 gcc/config/rs6000/rs6000-cpus.def |4 +-
 gcc/config/rs6000/rs6000.c|3 +
 gcc/config/rs6000/rs6000.opt  |4 +
 5 files changed, 2275 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index a4d3a6ae7f3..1ddbe7fe3d2 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -355,3 +355,2179 @@ (define_insn_and_split 
"*lbz_cmpldi_cr0_QI_GPR_CCUNS_zero"
(set_attr "cost" "8")
(set_attr "length" "8")])
 
+
+;; logical-logical fusion pattern generated by gen_2logical
+;; kind: scalar outer: and op and rtl and inv 0 comp 0
+;; inner: and op and rtl and inv 0 comp 0
+(define_insn "*fuse_and_and"
+  [(set (match_operand:GPR 3 "gpc_reg_operand" "=&r,0,1,r")
+(and:GPR (and:GPR (match_operand:GPR 0 "gpc_reg_operand" "r,r,r,r") 
(match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r")) (match_operand:GPR 2 
"gpc_reg_operand" "r,r,r,r")))
+   (clobber (match_scratch:GPR 4 "=X,X,X,r"))]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)"
+  "@
+   and %3,%1,%0\;and %3,%3,%2
+   and %0,%1,%0\;and %0,%0,%2
+   and %1,%1,%0\;and %1,%1,%2
+   and %4,%1,%0\;and %3,%4,%2"
+  [(set_attr "type" "logical")
+   (set_attr "cost" "6")
+   (set_attr "length" "8")])
+
+;; logical-logical fusion pattern generated by gen_2logical
+;; kind: scalar outer: and op and rtl and inv 0 comp 0
+;; inner: andc op andc rtl and inv 0 comp 1
+(define_insn "*fuse_andc_and"
+  [(set (match_operand:GPR 3 "gpc_reg_operand" "=&r,0,1,r")
+(and:GPR (and:GPR (not:GPR (match_operand:GPR 0 "gpc_reg_operand" 
"r,r,r,r")) (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) 
(match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))
+   (clobber (match_scratch:GPR 4 "=X,X,X,r"))]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)"
+  "@
+   andc %3,%1,%0\;and %3,%3,%2
+   andc %0,%1,%0\;and %0,%0,%2
+   andc %1,%1,%0\;and %1,%1,%2
+   andc %4,%1,%0\;and %3,%4,%2"
+  [(set_attr "type" "logical")
+   (set_attr "cost" "6")
+   (set_attr "length" "8")])
+
+;; logical-logical fusion pattern generated by gen_2logical
+;; kind: scalar outer: and op and rtl and inv 0 comp 0
+;; inner: eqv op eqv rtl xor inv 1 comp 0
+(define_insn "*fuse_eqv_and"
+  [(set (match_operand:GPR 3 "gpc_reg_operand" "=&r,0,1,r")
+(and:GPR (not:GPR (xor:GPR (match_operand:GPR 0 "gpc_reg_operand" 
"r,r,r,r") (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) 
(match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))
+   (clobber (match_scratch:GPR 4 "=X,X,X,r"))]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)"
+  "@
+   eqv %3,%1,%0\;and %3,%3,%2
+   eqv %0,%1,%0\;and %0,%0,%2
+   eqv %1,%1,%0\;and %1,%1,%2
+   eqv %4,%1,%0\;and %3,%4,%2"
+  [(set_attr "type" "logical")
+   (set_attr "cost" "6")
+   (set_attr "length" "8")])
+
+;; logical-logical fusion pattern generated by gen_2logical
+;; kind: scalar outer: and op and rtl and inv 0 comp 0
+;; inner: nand op nand rtl ior inv 0 comp 3
+(define_insn "*fuse_nand_and"
+  [(set (match_operand:GPR 3 "gpc_reg_operand" "=&r,0,1,r")
+(and:GPR (ior:GPR (not:GPR (match_operand:GPR 0 "gpc_reg_operand" 
"r,r,r,r")) (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) 
(match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))
+   (clobber (match_scratch:GPR 4 "=X,X,X,r"))]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)"
+  "@
+   nand %3,%1,%0\;and %3,%3,%2
+   nand %0,%1,%0\;and %0,%0,%2
+   nand %1,%1,%0\;and %1,%1,%2
+   nand %4,%1,%0\;and %3,%4,%2"
+  [(set_attr "type" "logical")
+   (set_attr "cost" "6")
+   (set_attr "length" "8")])
+
+;; logical-logical fusion pattern generated by gen_2logical
+;; kind: scalar outer: and op and rtl and inv 0 comp 0
+;; inner: nor op nor rtl and inv 0 comp 3
+(define_insn "*fuse_nor_and"
+  [(set (match_operand:GPR 3 "gpc_reg_operand" "=&r,0,1,r")
+(and:GPR (and:GPR (not:GPR (match_operand:GPR 0 "gpc_reg_operand" 
"r,r,r,r")) (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) 
(match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r

Re: [PATCH] gcc: handle double quotes in symbol name during stabstrings generation

2020-12-10 Thread Ian Lance Taylor via Gcc-patches

On Tue, Dec 8, 2020 at 5:15 AM CHIGOT, CLEMENT  wrote:
>
> Any news about this bug ? It's not urgent even if it's breaking gcc builds 
> with Go language, but I just want to know if you need any inputs/help from me.

I sent https://golang.org/cl/277232 to fix this problem.

Ian



> 
> From: CHIGOT, CLEMENT 
> Sent: Wednesday, December 2, 2020 5:14 PM
> To: Ian Lance Taylor 
> Cc: gcc-patches@gcc.gnu.org ; David Edelsohn 
> 
> Subject: Re: [PATCH] gcc: handle double quotes in symbol name during 
> stabstrings generation
>
> Hi Ian,
>
> Here is the test case.
> If you're compiling with -gstabs you should have a line looking like:
> .stabs  "type..struct{Type go.bug1.ObjectIdentifier;Value 
> [][]go.bug1.Extension{asn1:"set"}}:G(0,7)=xsStructType:",32,0,0,0
>
> As you can see the " around for "set" aren't escaped.
> I didn't try to reproduce it on linux/amd64, but I did on linux/ppc64le and I 
> don't think it's a ppc-only bug.
>
> Clément
> 
> From: Ian Lance Taylor 
> Sent: Wednesday, December 2, 2020 4:55 PM
> To: CHIGOT, CLEMENT 
> Cc: gcc-patches@gcc.gnu.org ; David Edelsohn 
> 
> Subject: Re: [PATCH] gcc: handle double quotes in symbol name during 
> stabstrings generation
>
> Caution! External email. Do not open attachments or click links, unless this 
> email comes from a known sender and you know the content is safe.
>
> On Wed, Dec 2, 2020 at 4:24 AM CHIGOT, CLEMENT  
> wrote:
> >
> > Since the new gccgo mangling scheme, libgo compilation is broken on AIX (or 
> > in Linux with -gstabs) because of a type symbol having a " in its name. 
> > I've made a patch (see attachment) in order to fix stabstring generation, 
> > because, IMO, it should be handled anyway.
> > However, it happens only once in the whole libgo so I don't know if this " 
> > is intended or not. The problematic type is there: 
> > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fgolang%2Fgo%2Fblob%2Fmaster%2Fsrc%2Fcrypto%2Fx509%2Fx509.go%23L2674&data=04%7C01%7Cclement.chigot%40atos.net%7Ce85b8b57669c47db583508d896db2fc2%7C33440fc6b7c7412cbb730e70b0198d5a%7C0%7C0%7C637425215428486700%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=aB6diiR9Tgo3FTKOm0vmqVVJ%2B5JlCwd9oM5WeUaTaF4%3D&reserved=0.
> >  Other similar types don't trigger the bug though.
> >
> > I've a minimal test which might can be added if you wish, in Golang tests 
> > or in Gcc Go tests or in both ?
> >
> > If the patch is okay, could you please apply it for me ?
>
> Could you show me the small test case?  I don't think I understand the
> problem.  In DWARF I don't see any symbol names with quotation marks.
> I'm not yet sure that your patch is the right fix.  Thanks.
>
> Ian

Re: [PATCH] Fix up testcase.

2020-12-10 Thread Hongtao Liu via Gcc-patches

On Thu, Dec 10, 2020 at 8:52 PM Prathamesh Kulkarni
 wrote:
>
> On Wed, 9 Dec 2020 at 15:52, Hongtao Liu  wrote:
> >
> > On Wed, Dec 9, 2020 at 5:22 PM Prathamesh Kulkarni via Gcc-patches
> >  wrote:
> > >
> > > On Wed, 9 Dec 2020 at 00:29, sunil.k.pandey  wrote:
> > > >
> > > > On Linux/x86_64,
> > > >
> > > > 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1 is the first bad commit
> > > > commit 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1
> > > > Author: Prathamesh Kulkarni 
> > > > Date:   Tue Dec 8 14:30:04 2020 +0530
> > > >
> > > > gimple-isel: Fold x CMP y ? -1 : 0 to x CMP y [PR97872]
> > > >
> > > > caused
> > > >
> > > > FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
> > > Hi,
> > > This is a known issue with the patch, and discussed here:
> > > https://gcc.gnu.org/pipermail/gcc/2020-December/234438.html
> > > I guess Hongtao will check in a fix for that soon.
> > >
> >
> > According to https://uops.info/table.html,
> > both pcmpeqq and pcmpeqd use only port 1, so i think there's no
> > performance difference between
> >
> > vpcmpeqq %xmm1, %xmm0, %xmm0
> > vpxor %xmm1, %xmm1, %xmm1
> > vpcmpeqq %xmm1, %xmm0, %xmm0
> >
> > and
> >
> > vpcmpeqq %xmm1, %xmm0, %xmm0
> > vpcmpeqd %xmm1, %xmm1, %xmm1
> > vpandn %xmm1, %xmm0, %xmm0
> >
> > So fix up testcase as below.
> >
> > gcc/testsuite
> >
> > * gcc.target/i386/i386/pr78102.c: Adjust testcase.
> >
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> > gcc/testsuite/gcc.target/i386/pr78102.c | 2 +-
> >
> > modified   gcc/testsuite/gcc.target/i386/pr78102.c
> > @@ -1,7 +1,7 @@
> >  /* PR target/78102 */
> >  /* { dg-do compile } */
> >  /* { dg-options "-O2 -mno-sse4.2 -msse4.1" } */
> > -/* { dg-final { scan-assembler-times "pcmpeqq" 3 } } */
> > +/* { dg-final { scan-assembler-times "pcmpeq" 4 } } */
> >
> > Ok for trunk?
> Thanks for the fix!
> Just a small nit - Should it be "pcmpeqq" rather than "pcmpeq" in the
> dg-final line ?
>

4 pcmpeq would cover both 4 pcmpeqq or 3 pcmpeqq + 1 pcmpeqd(original situation)

> Thanks,
> Prathamesh
> >
> >
> >
> > --
> > BR,
> > Hongtao



-- 
BR,
Hongtao

[PATCH] c++: missing SFINAE with pointer subtraction [PR78173]

2020-12-10 Thread Patrick Palka via Gcc-patches

This fixes a missed SFINAE when subtracting pointers to an incomplete
type.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

gcc/cp/ChangeLog:

PR c++/78173
* typeck.c (pointer_diff): Use complete_type_or_maybe_complain
instead of complete_type_or_else.

gcc/testsuite/ChangeLog:

PR c++/78173
* g++.dg/cpp2a/concepts-pr78173.C: New test.
---
 gcc/cp/typeck.c   | 2 +-
 gcc/testsuite/g++.dg/cpp2a/concepts-pr78173.C | 9 +
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-pr78173.C

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index dd84674a5e6..c41457bcce8 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -5985,7 +5985,7 @@ pointer_diff (location_t loc, tree op0, tree op1, tree 
ptrtype,
   tree restype = ptrdiff_type_node;
   tree target_type = TREE_TYPE (ptrtype);
 
-  if (!complete_type_or_else (target_type, NULL_TREE))
+  if (!complete_type_or_maybe_complain (target_type, NULL_TREE, complain))
 return error_mark_node;
 
   if (VOID_TYPE_P (target_type))
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-pr78173.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-pr78173.C
new file mode 100644
index 000..50f561abcce
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-pr78173.C
@@ -0,0 +1,9 @@
+// PR c++/78173
+// { dg-do compile { target c++20 } }
+
+template 
+concept CanDifference = requires(T x, T y) {
+x - y;
+};
+
+static_assert(!CanDifference);
-- 
2.29.2.540.g3cf59784d4

Re: [PATCH] c++: missing SFINAE with pointer subtraction [PR78173]

2020-12-10 Thread Jason Merrill via Gcc-patches

Ok.

On Fri, Dec 11, 2020, 12:45 AM Patrick Palka  wrote:

> This fixes a missed SFINAE when subtracting pointers to an incomplete
> type.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
> trunk?
>
> gcc/cp/ChangeLog:
>
> PR c++/78173
> * typeck.c (pointer_diff): Use complete_type_or_maybe_complain
> instead of complete_type_or_else.
>
> gcc/testsuite/ChangeLog:
>
> PR c++/78173
> * g++.dg/cpp2a/concepts-pr78173.C: New test.
> ---
>  gcc/cp/typeck.c   | 2 +-
>  gcc/testsuite/g++.dg/cpp2a/concepts-pr78173.C | 9 +
>  2 files changed, 10 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-pr78173.C
>
> diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
> index dd84674a5e6..c41457bcce8 100644
> --- a/gcc/cp/typeck.c
> +++ b/gcc/cp/typeck.c
> @@ -5985,7 +5985,7 @@ pointer_diff (location_t loc, tree op0, tree op1,
> tree ptrtype,
>tree restype = ptrdiff_type_node;
>tree target_type = TREE_TYPE (ptrtype);
>
> -  if (!complete_type_or_else (target_type, NULL_TREE))
> +  if (!complete_type_or_maybe_complain (target_type, NULL_TREE, complain))
>  return error_mark_node;
>
>if (VOID_TYPE_P (target_type))
> diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-pr78173.C
> b/gcc/testsuite/g++.dg/cpp2a/concepts-pr78173.C
> new file mode 100644
> index 000..50f561abcce
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-pr78173.C
> @@ -0,0 +1,9 @@
> +// PR c++/78173
> +// { dg-do compile { target c++20 } }
> +
> +template 
> +concept CanDifference = requires(T x, T y) {
> +x - y;
> +};
> +
> +static_assert(!CanDifference);
> --
> 2.29.2.540.g3cf59784d4
>
>

Re: [PATCH] Fix up testcase.

2020-12-10 Thread Prathamesh Kulkarni via Gcc-patches

On Fri, 11 Dec 2020 at 10:46, Hongtao Liu  wrote:
>
> On Thu, Dec 10, 2020 at 8:52 PM Prathamesh Kulkarni
>  wrote:
> >
> > On Wed, 9 Dec 2020 at 15:52, Hongtao Liu  wrote:
> > >
> > > On Wed, Dec 9, 2020 at 5:22 PM Prathamesh Kulkarni via Gcc-patches
> > >  wrote:
> > > >
> > > > On Wed, 9 Dec 2020 at 00:29, sunil.k.pandey  
> > > > wrote:
> > > > >
> > > > > On Linux/x86_64,
> > > > >
> > > > > 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1 is the first bad commit
> > > > > commit 3a6e3ad38a17a03ee0139b49a0946e7b9ded1eb1
> > > > > Author: Prathamesh Kulkarni 
> > > > > Date:   Tue Dec 8 14:30:04 2020 +0530
> > > > >
> > > > > gimple-isel: Fold x CMP y ? -1 : 0 to x CMP y [PR97872]
> > > > >
> > > > > caused
> > > > >
> > > > > FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
> > > > Hi,
> > > > This is a known issue with the patch, and discussed here:
> > > > https://gcc.gnu.org/pipermail/gcc/2020-December/234438.html
> > > > I guess Hongtao will check in a fix for that soon.
> > > >
> > >
> > > According to https://uops.info/table.html,
> > > both pcmpeqq and pcmpeqd use only port 1, so i think there's no
> > > performance difference between
> > >
> > > vpcmpeqq %xmm1, %xmm0, %xmm0
> > > vpxor %xmm1, %xmm1, %xmm1
> > > vpcmpeqq %xmm1, %xmm0, %xmm0
> > >
> > > and
> > >
> > > vpcmpeqq %xmm1, %xmm0, %xmm0
> > > vpcmpeqd %xmm1, %xmm1, %xmm1
> > > vpandn %xmm1, %xmm0, %xmm0
> > >
> > > So fix up testcase as below.
> > >
> > > gcc/testsuite
> > >
> > > * gcc.target/i386/i386/pr78102.c: Adjust testcase.
> > >
> > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > > gcc/testsuite/gcc.target/i386/pr78102.c | 2 +-
> > >
> > > modified   gcc/testsuite/gcc.target/i386/pr78102.c
> > > @@ -1,7 +1,7 @@
> > >  /* PR target/78102 */
> > >  /* { dg-do compile } */
> > >  /* { dg-options "-O2 -mno-sse4.2 -msse4.1" } */
> > > -/* { dg-final { scan-assembler-times "pcmpeqq" 3 } } */
> > > +/* { dg-final { scan-assembler-times "pcmpeq" 4 } } */
> > >
> > > Ok for trunk?
> > Thanks for the fix!
> > Just a small nit - Should it be "pcmpeqq" rather than "pcmpeq" in the
> > dg-final line ?
> >
>
> 4 pcmpeq would cover both 4 pcmpeqq or 3 pcmpeqq + 1 pcmpeqd(original 
> situation)
Ah indeed, thanks for pointing out.

Regards,
Prathamesh
>
> > Thanks,
> > Prathamesh
> > >
> > >
> > >
> > > --
> > > BR,
> > > Hongtao
>
>
>
> --
> BR,
> Hongtao

[PATCH] Complete _GLIBCXX_DEBUG constexpr compatibility

2020-12-10 Thread François Dumont via Gcc-patches


Hi

I'd like to commit this small fix to complete _GLIBCXX_DEBUG constexpr 
compatibility. There are still 2 macros not using __glibcxx_assert_1.


It fixes the generated diagnostic to have the __failed_assertion rather 
than a message saying that _Error_formatter::_M_error is not constexpr.


    libstdc++: Fix _GLIBCXX_DEBUG mode constexpr compatibility

    The __glibcxx_check_can_[increment|decrement]_range macros are 
using the
    _GLIBCXX_DEBUG_VERIFY_COND_AT macro which is not constexpr 
compliant and will produce nasty
    diagnostics rather than the std::__failed_assertion dedicated to 
constexpr. Replace it with

    correct _GLIBCXX_DEBUG_VERIFY_AT_F.

    libstdc++-v3/ChangeLog:
    * include/debug/macros.h 
(__glibcxx_check_can_increment_range): Replace
    _GLIBCXX_DEBUG_VERIFY_COND_AT usage with 
_GLIBCXX_DEBUG_VERIFY_AT_F.

    (__glibcxx_check_can_decrement_range): Likewise.
    * testsuite/25_algorithms/copy_backward/constexpr.cc 
(test03): New.
    * testsuite/25_algorithms/copy/debug/constexpr_neg.cc: New 
test.
    * 
testsuite/25_algorithms/copy_backward/debug/constexpr_neg.cc: New test.

    * testsuite/25_algorithms/equal/constexpr_neg.cc: New test.
    * testsuite/25_algorithms/equal/debug/constexpr_neg.cc: New 
test.


Tested under Linux x86_64 normal and debug modes.

Ok to commit ?

François

diff --git a/libstdc++-v3/include/debug/macros.h b/libstdc++-v3/include/debug/macros.h
index ef4c76c747a..a69310d9a12 100644
--- a/libstdc++-v3/include/debug/macros.h
+++ b/libstdc++-v3/include/debug/macros.h
@@ -98,13 +98,13 @@ _GLIBCXX_DEBUG_VERIFY(__gnu_debug::__can_advance(_First, _Size),	\
   do	\
   {	\
 typename __gnu_debug::_Distance_traits<__decltype(_First1)>::__type __dist;\
-_GLIBCXX_DEBUG_VERIFY_COND_AT(	\
+_GLIBCXX_DEBUG_VERIFY_AT_F(		\
 			__gnu_debug::__valid_range(_First1, _Last1, __dist),\
 			_M_message(__gnu_debug::__msg_valid_range)	\
 			._M_iterator(_First1, #_First1)			\
 			._M_iterator(_Last1, #_Last1),			\
 			__FILE__,__LINE__,__PRETTY_FUNCTION__);		\
-_GLIBCXX_DEBUG_VERIFY_COND_AT(	\
+_GLIBCXX_DEBUG_VERIFY_AT_F(		\
 			__gnu_debug::__can_advance(_First2, __dist.first),\
 			_M_message(__gnu_debug::__msg_iter_subscript_oob)\
 			._M_iterator(_First2, #_First2)			\
@@ -116,13 +116,13 @@ _GLIBCXX_DEBUG_VERIFY(__gnu_debug::__can_advance(_First, _Size),	\
   do	\
   {	\
 typename __gnu_debug::_Distance_traits<__decltype(_First1)>::__type __dist;\
-_GLIBCXX_DEBUG_VERIFY_COND_AT(	\
+_GLIBCXX_DEBUG_VERIFY_AT_F(		\
 			__gnu_debug::__valid_range(_First1, _Last1, __dist),\
 			_M_message(__gnu_debug::__msg_valid_range)	\
 			._M_iterator(_First1, #_First1)			\
 			._M_iterator(_Last1, #_Last1),			\
 			__FILE__,__LINE__,__PRETTY_FUNCTION__);		\
-_GLIBCXX_DEBUG_VERIFY_COND_AT(	\
+_GLIBCXX_DEBUG_VERIFY_AT_F(		\
 			__gnu_debug::__can_advance(_First2, -__dist.first),\
 			_M_message(__gnu_debug::__msg_iter_subscript_oob)\
 			._M_iterator(_First2, #_First2)			\
diff --git a/libstdc++-v3/testsuite/25_algorithms/copy/debug/constexpr_neg.cc b/libstdc++-v3/testsuite/25_algorithms/copy/debug/constexpr_neg.cc
new file mode 100644
index 000..fc66a2f8fca
--- /dev/null
+++ b/libstdc++-v3/testsuite/25_algorithms/copy/debug/constexpr_neg.cc
@@ -0,0 +1,53 @@
+// Copyright (C) 2020 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++2a" }
+// { dg-do compile { target c++2a xfail *-*-* } }
+// { dg-require-debug-mode ""  }
+
+#include 
+#include 
+
+constexpr bool
+test1()
+{
+  constexpr std::array ca0{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}};
+  std::array ma0{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
+
+  const auto out6 = std::copy(ca0.begin() + 8, ca0.begin(), ma0.begin() + 2);
+
+  return out6 == ma0.begin() + 10;
+}
+
+static_assert(test1()); // { dg-error "non-constant condition" }
+
+constexpr bool
+test2()
+{
+  constexpr std::array ca0{{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}};
+  std::array ma0{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
+
+  const auto out6 = std::copy(ca0.begin(), ca0.begin() + 8, ma0.begin() + 10);
+
+  return out6 == ma0.begin() + 18;
+}
+
+static_assert(test2()); // { dg-error

Re: [PATCH] aix: Fixinclude updates [PR98208]

2020-12-10 Thread Richard Biener via Gcc-patches

On Fri, Dec 11, 2020 at 2:01 AM Ilya Leoshkevich via Gcc-patches
 wrote:
>
> Tested on gcc121 (x86_64 CentOS Linux 7).  Ok for master?

OK

>
>
> After 92648faa1cb2 ("aix: Fixinclude") make check-fixincludes began to
> fail (at least on gcc121 machine).  Fix by updating fixincludes/tests
> and rerunning genfixes.
>
> fixincludes/ChangeLog:
>
> 2020-12-11  Ilya Leoshkevich  
>
> * fixincl.x: Rerun genfixes.
> * tests/base/sys/types.h: Add AIX_PHYSADR_T_CHECK.
> ---
>  fixincludes/fixincl.x  | 4 ++--
>  fixincludes/tests/base/sys/types.h | 5 +
>  2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/fixincludes/fixincl.x b/fixincludes/fixincl.x
> index 21439652bce..cc17edfba0b 100644
> --- a/fixincludes/fixincl.x
> +++ b/fixincludes/fixincl.x
> @@ -2,11 +2,11 @@
>   *
>   * DO NOT EDIT THIS FILE   (fixincl.x)
>   *
> - * It has been AutoGen-ed  October 21, 2020 at 10:43:22 AM by AutoGen 5.18.16
> + * It has been AutoGen-ed  December  9, 2020 at 11:16:08 AM by AutoGen 
> 5.18.16
>   * From the definitionsinclhack.def
>   * and the template file   fixincl
>   */
> -/* DO NOT SVN-MERGE THIS FILE, EITHER Wed Oct 21 10:43:22 EDT 2020
> +/* DO NOT SVN-MERGE THIS FILE, EITHER Wed Dec  9 11:16:08 EST 2020
>   *
>   * You must regenerate it.  Use the ./genfixes script.
>   *
> diff --git a/fixincludes/tests/base/sys/types.h 
> b/fixincludes/tests/base/sys/types.h
> index 683b5e93ecd..a318f9b713b 100644
> --- a/fixincludes/tests/base/sys/types.h
> +++ b/fixincludes/tests/base/sys/types.h
> @@ -9,6 +9,11 @@
>
>
>
> +#if defined( AIX_PHYSADR_T_CHECK )
> +typedef struct __physadr_s {
> +#endif  /* AIX_PHYSADR_T_CHECK */
> +
> +
>  #if defined( GNU_TYPES_CHECK )
>  #if !defined(_GCC_PTRDIFF_T)
>  #define _GCC_PTRDIFF_T
> --
> 2.25.4
>

Re: Optimize combination of comparisons to dec+compare

2020-12-10 Thread Richard Biener via Gcc-patches

On Thu, Dec 10, 2020 at 1:52 AM Eugene Rozenfeld via Gcc-patches
 wrote:
>
> This patch adds a pattern for optimizing
> x < y || x == XXX_MIN to x <= y-1
> if y is an integer with TYPE_OVERFLOW_WRAPS.

Do we already handle x < y || x <= CST to x <= y - CST?
That is, the XXX_MIN case is just a special-case of generic
anti-range testing?  For anti-range testing with signed types
we pun to unsigned when possible.

> This fixes pr96674.
>
> Tested on x86_64-pc-linux-gnu.
>
> For this function
>
> bool f(unsigned a, unsigned b)
> {
> return (b == 0) | (a < b);
> }
>
> the code without the patch is
>
> test   esi,esi
> sete   al
> cmpesi,edi
> seta   dl
> or eax,edx
> ret
>
> the code with the patch is
>
> subesi,0x1
> cmpesi,edi
> setae  al
> ret
>
> Eugene
>
> gcc/
> PR tree-optimization/96674
> * match.pd: New pattern x < y || x == XXX_MIN --> x <= y - 1
>
> gcc/testsuite
> * gcc.dg/pr96674.c: New test.
>

Re: [RFC] [avr] Toolchain Integration for Testsuite Execution (avr cc0 to mode_cc0 conversion)

2020-12-10 Thread Richard Biener via Gcc-patches

On Thu, Dec 10, 2020 at 6:42 AM Dimitar Dimitrov  wrote:
>
> On сряда, 9 декември 2020 г. 15:12:49 EET abebeos via Gcc-patches wrote:
> > Essence:
> >
> > I need a confirmation that the testsuite setup as presented in:
> >
> > https://github.com/abebeos/avr-gnu
> >
> > works fine.
> >
> > The problem with the avr target is that the testsuite cannot be run easily,
> > mainly because of the need for a special simulated-target setup, which does
> > not work for avr as documented. This led developers to a dead-end with
> > their non-cc0-avr-backends (the non-cc0 backend is needed thus avr is not
> > dropped from gcc11).
> >
> > I integrated a toolchain/testsetup to be able to run the gcc testsuite
> > against a simulated avr target.
> >
> > I then used this toolchain to test 2 different existent
> > non-cc0-avr-backends (from pipcet and saaadhu, both github).
> >
> > The result is that saaadhu's backend seems to be working 100%. It has
> > identical testsuite results with the existing (but deprecated) cc0-backend,
> > which means that it can be used "as-is" for inclusion in gcc11.
> >
> > Please note that I did this work in context of a bounty @ bountysouce, more
> > information within the issue:
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92729#c35
> Hi,
>
> I tested the trees you have given with my own AVR test setup [1]. I confirm
> your results:
>   - saaadhu's tree does not introduce any regressions.
>   - pipcet's tree has 142 gcc and 299 g++ regressions (although many of them
> are duplicates, e.g. same test case with different optimization levels).
>
> It's a bit awkward to copy gcc/config/avr into a mainline tree. Looking at
> their github history, both authors made some small changes in other areas. I
> would have prefered to cherry-pick or apply patches.
>
> =
> baseline beb9afcaf1466996a301c778596c5df209e7913c
>
> === gcc Summary ===
>
> # of expected passes87504
> # of unexpected failures1105
> # of unexpected successes   15
> # of expected failures  581
> # of unresolved testcases   16786
> # of unsupported tests  5370
>
> === g++ Summary ===
>
> # of expected passes140663
> # of unexpected failures7932
> # of unexpected successes   21
> # of expected failures  620
> # of unresolved testcases   8603
> # of unsupported tests  11305
>
> =
> pipcet/avr-ccmode
>
> === gcc Summary ===
>
> # of expected passes87463
> # of unexpected failures1221
> # of unexpected successes   15
> # of expected failures  581
> # of unresolved testcases   16799
> # of unsupported tests  5359
>
> === g++ Summary ===
>
> # of expected passes140529
> # of unexpected failures8205
> # of unexpected successes   21
> # of expected failures  620
> # of unresolved testcases   8607
> # of unsupported tests  11301
>
> =
> saadhu/avr-cc0
> === gcc Summary ===
>
> # of expected passes87504
> # of unexpected failures1105
> # of unexpected successes   15
> # of expected failures  581
> # of unresolved testcases   16786
> # of unsupported tests  5370
>
> === g++ Summary ===
>
> # of expected passes140663
> # of unexpected failures7932
> # of unexpected successes   21
> # of expected failures  620
> # of unresolved testcases   8603
> # of unsupported tests  11305
>
> On a side note, I build and test AVR backend in mainline everyday. If there is
> interest from AVR maintainers I can post daily results to gcc-testresults@
> mailing list.

I'd appreciate such postings, not necessarily daily (if there are no changes).
Also (as usual) for active release branches (say once a week or biweekly
or even monthly).

Without any hints on gcc-testresults its hard to asses whether ports
pass some very basic functionality criteria (AVR is neither primary nor
secondary but shipping totally broken ports isn't in our interest).

Thanks a lot,
Richard.

> Regards,
> Dimitar
>
> [1] https://github.com/dinuxbg/gnupru/blob/master/testing/buildbot-avr.sh
>
>

Re: introduce overridable clear_cache emitter

2020-12-10 Thread Alexandre Oliva

On Dec  5, 2020, Jakub Jelinek  wrote:

> On Sat, Dec 05, 2020 at 06:01:59PM -0300, Alexandre Oliva wrote:
>> On Dec  5, 2020, Andreas Schwab  wrote:
>> 
>> > ../../../../libffi/src/aarch64/ffi.c: In function 'ffi_prep_closure_loc':
>> > ../../../../libffi/src/aarch64/ffi.c:67:3: internal compiler error: in 
>> > emit_library_call_value_1, at calls.c:5300
>> >67 |   __builtin___clear_cache (start, end);
>> >   |   ^~~~
>> 
>> Is this still aarch64-linux-gnu -mabi=ilp32?  I'm afraid I couldn't
>> duplicate this error using a cross compiler (without binutils, but with
>> HAVE_AS_MABI_OPTION forced enabled), and many variants of a manually
>> minimized ffi.c (to build without libc):

> See PR98147, I've put there an untested patch, but I have no way to test it.

Thanks for the fix.  I can't imagine why that wouldn't have been hit in
my reduced-build scenario, but once I saw your patch, it was pretty
obvious that that was it, and I haven't investigated any further.

-- 
Alexandre Oliva, happy hacker  https://FSFLA.org/blogs/lxo/
   Free Software Activist GNU Toolchain Engineer
Vim, Vi, Voltei pro Emacs -- GNUlius Caesar

[PATCH] expansion: Sign or zero extend on MEM_REF stores into SUBREG with SUBREG_PROMOTED_VAR_P [PR98190]

2020-12-10 Thread Jakub Jelinek via Gcc-patches

Hi!

Some target decide to promote certain scalar variables to wider mode,
so their DECL_RTL is a SUBREG with SUBREG_PROMOTED_VAR_P.
When storing to such vars, store_expr takes care of sign or zero extending,
but if we store e.g. through MEM_REF into them, no sign or zero extension
happens and that leads to wrong-code e.g. on the following testcase on
aarch64-linux.

The following patch uses store_expr if we overwrite all the bits and it is
not reversed storage order, i.e. something that store_expr handles normally,
and otherwise (if the most significant bit is (or for pdp11 might be, but
pdp11 doesn't promote) being modified), the code extends manually.

I've bootstrapped/regtested an earlier version of this patch on
{x86_64,i686,aarch64,armv7hl,powerpc64le}-linux; that version head
instead an assert that bitpos is 0 and bitsize GET_MODE_BITSIZE of to_rtx.
That resulted in
g++.dg/warn/Wstrict-aliasing-bogus-char-1.C
gcc.dg/pr87273.c
gcc.dg/torture/pr91656-1.c
gcc.dg/tree-ssa/pr92085-2.c
gcc.dg/tree-ssa/pr94703.c
regressions on powerpc64le and
gcc.c-torture/execute/pr93213.c
on powerpc64le and aarch64, but all of those succeed now with this version.

Ok for trunk?

2020-12-10  Jakub Jelinek  

PR middle-end/98190
* expr.c (expand_assignment): If to_rtx is a promoted SUBREG,
ensure sign or zero extension when the most significant bit has
been overwritten, either through use of store_expr or by extending
manually.

* gcc.dg/pr98190.c: New test.

--- gcc/expr.c.jj   2020-12-09 23:50:41.385776978 +0100
+++ gcc/expr.c  2020-12-10 09:35:56.231058928 +0100
@@ -5451,6 +5451,43 @@ expand_assignment (tree to, tree from, b
   mode1, to_rtx, to, from,
   reversep))
result = NULL;
+ else if (SUBREG_P (to_rtx)
+  && SUBREG_PROMOTED_VAR_P (to_rtx))
+   {
+ /* If to_rtx is a promoted subreg, we need to zero or sign
+extend the value afterwards.  */
+ if (TREE_CODE (to) == MEM_REF
+ && !REF_REVERSE_STORAGE_ORDER (to)
+ && known_eq (bitpos, 0)
+ && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (to_rtx
+   result = store_expr (from, to_rtx, 0, nontemporal, false);
+ else
+   {
+ rtx to_rtx1 = to_rtx;
+ /* Optimize by checking if the overwritten bits
+include the most significant bit.  */
+ if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN
+ || (BYTES_BIG_ENDIAN && known_eq (bitpos, 0))
+ || (!BYTES_BIG_ENDIAN
+ && known_eq (bitpos + bitsize,
+  GET_MODE_BITSIZE (GET_MODE (to_rtx)
+   to_rtx1 = lowpart_subreg (subreg_unpromoted_mode (to_rtx),
+ SUBREG_REG (to_rtx),
+ subreg_promoted_mode (to_rtx));
+ result = store_field (to_rtx1, bitsize, bitpos,
+   bitregion_start, bitregion_end,
+   mode1, from, get_alias_set (to),
+   nontemporal, reversep);
+ if (to_rtx1 != to_rtx)
+   {
+ to_rtx1
+   = convert_to_mode (subreg_promoted_mode (to_rtx),
+  to_rtx1,
+  SUBREG_PROMOTED_SIGN (to_rtx));
+ emit_move_insn (SUBREG_REG (to_rtx), to_rtx1);
+   }
+   }
+   }
  else
result = store_field (to_rtx, bitsize, bitpos,
  bitregion_start, bitregion_end,
--- gcc/testsuite/gcc.dg/pr98190.c.jj   2020-12-10 09:08:54.838216477 +0100
+++ gcc/testsuite/gcc.dg/pr98190.c  2020-12-10 09:08:54.838216477 +0100
@@ -0,0 +1,33 @@
+/* PR middle-end/98190 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+static int __attribute__((noipa))
+foo (const char *p, const char *q, const int len)
+{
+  for (int i = 0; i < len; p++, q++, i++)
+{
+  int equal;
+  _Bool x, y;
+  __builtin_memcpy ((char *) &x, p, sizeof x);
+  __builtin_memcpy ((char *) &y, q, sizeof y);
+  equal = (x == y);
+  if (equal <= 0)
+   return equal;
+}
+  return 1;
+}
+
+int
+main ()
+{
+  const _Bool buf[4] = { 1, 0, 0, 0 };
+#ifdef __aarch64__
+  register long x4 asm ("x4") = 0xdeadbeefULL;
+  register long x5 asm ("x5") = 0xcafebabeULL;
+  asm volatile (""::"r" (x4), "r" (x5));
+#endif
+  if (foo ((char *) &buf[0], (char *) &buf[0], 1) != 1)
+__builtin_abort ();
+  return 0;
+}

Jakub

Re: [PATCH V2] RISC-V: Explicitly call python when using multilib generator

2020-12-10 Thread Simon Cook

Hi Kito,

Thanks for reviewing this. Since I don't have commit access, could you
commit this for me.

On 10/12/2020 02:32, Kito Cheng wrote:
> Hi Simon:
> 
> V2 version is LGTM, thanks!
>

[PATCH] dojump: Optimize a == a or a != a [PR98169]

2020-12-10 Thread Jakub Jelinek via Gcc-patches

Hi!

If the backend doesn't have floating point EQ or NE comparison, dojump.c
splits it into ORDERED && UNEQ or UNORDERED || LTGT.  If both comparison
operands are the same, we know the result of the second comparison though,
a == b is equivalent to a ord b and a != b is equivalent to a unord b,
and thus can just use ORDERED or UNORDERED.

On the testcase, this changes f1:
-   ucomiss %xmm0, %xmm0
-   movl$1, %eax
-   jp  .L3
-   jne .L3
-   ret
-   .p2align 4,,10
-   .p2align 3
-.L3:
xorl%eax, %eax
+   ucomiss %xmm0, %xmm0
+   setnp   %al
and f3:
-   ucomisd %xmm0, %xmm0
-   movl$1, %eax
-   jp  .L8
-   jne .L8
-   ret
-   .p2align 4,,10
-   .p2align 3
-.L8:
xorl%eax, %eax
+   ucomisd %xmm0, %xmm0
+   setnp   %al
while keeping the same code for f2 and f4.

Bootstrapped/regtested on {x86_64,i686,powerpc64le,aarch64,armv7hl}-linux,
ok for trunk?

2020-12-10  Jakub Jelinek  

PR tree-optimization/98169
* dojump.c (do_compare_rtx_and_jump): Don't split self-EQ/NE
comparisons, just use ORDERED or UNORDERED.

* gcc.target/i386/pr98169.c: New test.

--- gcc/dojump.c.jj 2020-01-27 13:20:40.0 +0100
+++ gcc/dojump.c2020-12-09 11:56:12.484150856 +0100
@@ -1114,7 +1114,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op
   /* ... or if there is no libcall for it.  */
   || code_to_optab (code) == unknown_optab))
 {
- enum rtx_code first_code;
+ enum rtx_code first_code, orig_code = code;
  bool and_them = split_comparison (code, mode, &first_code, &code);
 
  /* If there are no NaNs, the first comparison should always fall
@@ -1122,6 +1122,12 @@ do_compare_rtx_and_jump (rtx op0, rtx op
  if (!HONOR_NANS (mode))
gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
 
+ else if ((orig_code == EQ || orig_code == NE)
+  && rtx_equal_p (op0, op1))
+   /* Self-comparisons x == x or x != x can be optimized into
+  just x ord x or x nord x.  */
+   code = orig_code == EQ ? ORDERED : UNORDERED;
+
  else
{
  profile_probability cprob
--- gcc/testsuite/gcc.target/i386/pr98169.c.jj  2020-12-09 12:01:23.055686732 
+0100
+++ gcc/testsuite/gcc.target/i386/pr98169.c 2020-12-09 12:02:00.450269645 
+0100
@@ -0,0 +1,29 @@
+/* PR tree-optimization/98169 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-finite-math-only" } */
+/* { dg-final { scan-assembler-times "\tsetn\?p\t" 4 } } */
+/* { dg-final { scan-assembler-not "\tjn\?\[ep]\t" } } */
+
+int
+f1 (float a)
+{
+  return a == a;
+}
+
+int
+f2 (float a)
+{
+  return !__builtin_isnanf (a);
+}
+
+int
+f3 (double a)
+{
+  return a == a;
+}
+
+int
+f4 (double a)
+{
+  return !__builtin_isnan (a);
+}

Jakub

Re: [PATCH V2] RISC-V: Explicitly call python when using multilib generator

2020-12-10 Thread Kito Cheng via Gcc-patches

Hi Simon:

Committed, thanks :)

On Thu, Dec 10, 2020 at 4:56 PM Simon Cook  wrote:
>
> Hi Kito,
>
> Thanks for reviewing this. Since I don't have commit access, could you
> commit this for me.
>
> On 10/12/2020 02:32, Kito Cheng wrote:
> > Hi Simon:
> >
> > V2 version is LGTM, thanks!
> >

[PATCH] varasm: Reject soft frame or arg pointer registers for register vars [PR92469]

2020-12-10 Thread Jakub Jelinek via Gcc-patches

Hi!

The following patch rejects frame and argp registers (unless they are equal
to hard frame pointer registers) from local or global register vars.
These are just internal implementation details eliminated later into hard
frame pointer or stack pointer and using them as register variable leads
to numerous ICEs.

Bootstrapped/regtested on {x86_64,i686,aarch64,armv7hl,powerpc64le}-linux,
ok for trunk?

2020-12-10  Jakub Jelinek  

PR target/92469
* varasm.c (make_decl_rtl): Reject asm vars for frame and argp
if they are different from hard frame pointer.

* gcc.target/i386/pr92469.c: New test.
* gcc.target/i386/pr79804.c: Adjust expected diagnostics.
* gcc.target/i386/pr88178.c: Expect an error.

--- gcc/varasm.c.jj 2020-12-04 10:53:56.314043883 +0100
+++ gcc/varasm.c2020-12-09 13:36:00.888393542 +0100
@@ -1472,6 +1472,11 @@ make_decl_rtl (tree decl)
   else if (!targetm.hard_regno_mode_ok (reg_number, mode))
error ("register specified for %q+D isn%'t suitable for data type",
decl);
+  else if (reg_number != HARD_FRAME_POINTER_REGNUM
+  && (reg_number == FRAME_POINTER_REGNUM
+  || reg_number == ARG_POINTER_REGNUM))
+   error ("register specified for %q+D is an implementation%'s internal"
+  " register", decl);
   /* Now handle properly declared static register variables.  */
   else
{
--- gcc/testsuite/gcc.target/i386/pr92469.c.jj  2020-12-09 13:41:50.497501433 
+0100
+++ gcc/testsuite/gcc.target/i386/pr92469.c 2020-12-09 13:41:30.416724986 
+0100
@@ -0,0 +1,24 @@
+/* PR target/92469 */
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+void
+foo (void)
+{ 
+  register int x asm ("frame");/* { dg-error "register specified for 
'x' is an implementation's internal register" } */
+  int y = x;
+}
+
+void
+bar (void)
+{ 
+  register int x asm ("19");   /* { dg-error "register specified for 'x' is an 
implementation's internal register" } */
+  int y = x;
+}
+
+void
+baz (void)
+{ 
+  register int x asm ("argp"); /* { dg-error "register specified for 'x' is an 
implementation's internal register" } */
+  int y = x;
+}
--- gcc/testsuite/gcc.target/i386/pr79804.c.jj  2020-01-12 11:54:37.976389828 
+0100
+++ gcc/testsuite/gcc.target/i386/pr79804.c 2020-12-10 10:11:53.948939322 
+0100
@@ -4,7 +4,7 @@
 
 void foo (void)
 {
-  register int r19 asm ("19");
+  register int r19 asm ("19"); /* { dg-error "register specified for 'r19' is 
an implementation's internal register" } */
 
-  asm volatile ("# %0" : "=r"(r19));  /* { dg-error "invalid use of register" 
} */
-}  /* { dg-error "cannot be used in 'asm' here" } */
+  asm volatile ("# %0" : "=r"(r19));
+}
--- gcc/testsuite/gcc.target/i386/pr88178.c.jj  2020-01-12 11:54:37.983389722 
+0100
+++ gcc/testsuite/gcc.target/i386/pr88178.c 2020-12-10 10:12:30.140535022 
+0100
@@ -4,5 +4,5 @@
 
 void foo (void)
 {
-  register int r19 asm ("19");
+  register int r19 asm ("19"); /* { dg-error "register specified for 'r19' is 
an implementation's internal register" } */
 }

Jakub

[PATCH] dojump: Improve float != comparisons on x86 [PR98212]

2020-12-10 Thread Jakub Jelinek via Gcc-patches

Hi!

The x86 backend doesn't have EQ or NE floating point comparisons,
so splits x != y into x unord y || x <> y.  The problem with that is
that unord comparison doesn't trap on qNaN operands but LTGT does.
The end effect is that it doesn't trap on qNaN operands, because x unord y
will be true for those and so LTGT will not be performed, but as the backend
is currently unable to merge signalling and non-signalling comparisons (and
after all, with this exact exception it shouldn't unless the first one is
signalling and the second one is non-signalling) it means we end up with:
ucomiss %xmm1, %xmm0
jp  .L4
comiss  %xmm1, %xmm0
jne .L4
ret
.p2align 4,,10
.p2align 3
.L4:
xorl%eax, %eax
jmp foo
where the comiss is the signalling comparison, but we already know that
the right flags bits are already computed by the ucomiss insn.

The following patch, if target supports UNEQ comparisons, splits NE
as x unord y || !(x uneq y) instead, which in the end means we end up with
just:
ucomiss %xmm1, %xmm0
jp  .L4
jne .L4
ret
.p2align 4,,10
.p2align 3
.L4:
jmp foo
because UNEQ is like UNORDERED non-signalling.

Bootstrapped/regtested on {x86_64,i686,powerpc64le,aarch64,armv7hl}-linux,
ok for trunk?

2020-12-10  Jakub Jelinek  

PR rtl-optimization/98212
* dojump.c (do_compare_rtx_and_jump): When splitting NE and backend
can do UNEQ, prefer splitting x != y into x unord y || !(x uneq y)
instead of into x unord y || x ltgt y.

* gcc.target/i386/pr98212.c: New test.

--- gcc/dojump.c.jj 2020-12-09 15:25:43.827258452 +0100
+++ gcc/dojump.c2020-12-09 15:11:17.042888002 +0100
@@ -1168,6 +1168,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op
  profile_probability first_prob = prob.split (cprob);
  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, 
mode,
   size, NULL, if_true_label, 
first_prob);
+ if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
+   {
+ /* x != y can be split into x unord y || x ltgt y
+or x unord y || !(x uneq y).  The latter has the
+advantage that both comparisons are non-signalling and
+so there is a higher chance that the RTL optimizations
+merge the two comparisons into just one.  */
+ code = UNEQ;
+ prob = prob.invert ();
+ if (! if_false_label)
+   {
+ if (! dummy_label)
+   dummy_label = gen_label_rtx ();
+ if_false_label = dummy_label;
+   }
+ std::swap (if_false_label, if_true_label);
+   }
}
}
}
--- gcc/testsuite/gcc.target/i386/pr98212.c.jj  2020-12-09 15:33:11.930279183 
+0100
+++ gcc/testsuite/gcc.target/i386/pr98212.c 2020-12-09 15:24:43.933923980 
+0100
@@ -0,0 +1,21 @@
+/* PR rtl-optimization/98212 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse -mno-avx" } */
+/* { dg-final { scan-assembler-times "\tucomiss\t" 2 } } */
+/* { dg-final { scan-assembler-not "\tcomiss\t" } } */
+
+void foo (void);
+
+void
+bar (float a, float b)
+{
+  if (a != b)
+foo ();
+}
+
+void
+baz (float a, float b)
+{
+  if (a == b)
+foo ();
+}

Jakub

Re: [PATCH][GCC10][1/6] arm: Add vld1_lane_bf16 + vldq_lane_bf16 intrinsics

2020-12-10 Thread Andrea Corallo via Gcc-patches

Andrea Corallo via Gcc-patches  writes:

> Hi all,
>
> first patch of the series to backport a number of bfloat16 intrinsics from
> trunk to gcc-10.
>
> These patch are including the fixes to the tests that we have applied
> into master.
>
> Please see refer to:
> ACLE 
> ISA  
>
> The serie has been bootstrapped on arm-linux-gnueabihf and regtested.
>
> Okay for gcc-10?
>
> Thanks
>
>   Andrea

Pinging this and all the serie.

Thanks

  Andrea

[PATCH] dojump: Fix up probabilities splitting in dojump.c comparison splitting [PR98212]

2020-12-10 Thread Jakub Jelinek via Gcc-patches

Hi!

When compiling:
void foo (void);
void bar (float a, float b) { if (__builtin_expect (a != b, 1)) foo (); }
void baz (float a, float b) { if (__builtin_expect (a == b, 1)) foo (); }
void qux (float a, float b) { if (__builtin_expect (a != b, 0)) foo (); }
void corge (float a, float b) { if (__builtin_expect (a == b, 0)) foo (); }
on x86_64, we get (unimportant cruft removed):
bar:ucomiss %xmm1, %xmm0
jp  .L4
je  .L1
.L4:jmp foo
.L1:ret
baz:ucomiss %xmm1, %xmm0
jp  .L6
jne .L6
jmp foo
.L6:ret
qux:ucomiss %xmm1, %xmm0
jp  .L13
jne .L13
ret
.L13:   jmp foo
corge:  ucomiss %xmm1, %xmm0
jnp .L18
.L14:   ret
.L18:   jne .L14
jmp foo
(note for bar and qux that changed with a patch I've posted earlier today).
This is all reasonable, except the last function, the overall jump to
the tail call is predicted unlikely (10%), so it is good jmp foo isn't on
the straight line path, but NaNs are (or should be) considered very unlikely
in the programs, so IMHO the right code (and one emitted with the following
patch) is:
corge:  ucomiss %xmm1, %xmm0
jp  .L14
je  .L18
.L14:   ret
.L18:   jmp foo

When splitting conditions, we have previous original prob as probability of
jumping to true label and then use a cprob set to 99% for ORDERED and 1%
for UNORDERED.  For !and_them, we end up with splitting
if (x) goto true; // prob
goto false;
into:
if (y) goto true; // prob * cprob (== first_prob)
if (z) goto true; // adjusted prob
goto false;
with first_prob = prob.split (cprob); being the computation.

For and_them, we instead split:
if (x) goto true; // prob
goto false;
into:
if (y) goto false; // 1 - first_prob
if (z) goto true; // adjusted prob
goto false;
and first_prob being computed as:
prob = prob.invert ();
first_prob = prob.split (cprob).invert ();
prob = prob.invert ();
This sort of works if the initial probability is likely (larger than even),
but with original prob being e.g. 10%, we compute first_prob as 10.9%
- 1 - ((1 - 0.1) * 0.99) - as EQ splits into ORDERED && something, but
we use first_prob  on recursive call where we pass ORDERED and the false
label and NULL true label.  That means we predict the case where none of
the arguments is NaN is only 10.9%, which is very unlikely.

The following patch uses a different computation, taking into account
that for the and_them case, if first_code is ORDERED that
first_prob.invert () should be the probability of the inverted comparison
and UNORDERED should be considered unlikely, so it uses
first_prob = prob.split (cprob.invert ()).invert ();
without the two prob = prob.invert (); around it and that seems to work
fine for all the cases, but I'm not 100% sure if that is right.
Or in the PR there is yet another variant that also seems to work.

Bootstrapped/regtested on x86_64-linux and i686-linux.

2020-12-10  Jakub Jelinek  

PR rtl-optimization/98212
* dojump.c (do_compare_rtx_and_jump): Change computation of
first_prob for and_them and don't invert prob around it.

* gcc.dg/predict-8.c: Adjust expected probability.

--- gcc/dojump.c.jj 2020-12-09 15:11:17.042888002 +0100
+++ gcc/dojump.c2020-12-09 20:05:59.535234206 +0100
@@ -1148,9 +1148,8 @@ do_compare_rtx_and_jump (rtx op0, rtx op
  if (and_them)
{
  rtx_code_label *dest_label;
- prob = prob.invert ();
- profile_probability first_prob = prob.split (cprob).invert ();
- prob = prob.invert ();
+ profile_probability first_prob
+   = prob.split (cprob.invert ()).invert ();
  /* If we only jump if true, just bypass the second jump.  */
  if (! if_false_label)
{
--- gcc/testsuite/gcc.dg/predict-8.c.jj 2020-01-12 11:54:37.506396918 +0100
+++ gcc/testsuite/gcc.dg/predict-8.c2020-12-10 10:34:00.632123682 +0100
@@ -8,4 +8,4 @@ int foo(float a, float b) {
 return 2;
 }
 
-/* { dg-final { scan-rtl-dump-times "65.\[34]. .guessed" 2 "expand"} } */
+/* { dg-final { scan-rtl-dump-times "99.\[345]. .guessed" 2 "expand"} } */

Jakub

[committed] openmp: Fix ICE with broken doacross loop [PR98205]

2020-12-10 Thread Jakub Jelinek via Gcc-patches

Hi!

If the loop body doesn't ever continue, we don't have a bb to insert the
updates.  Fixed by not adding them at all in that case.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2020-12-10  Jakub Jelinek  

PR middle-end/98205
* omp-expand.c (expand_omp_for_generic): Fix up broken_loop handling.

* c-c++-common/gomp/doacross-4.c: New test.

--- gcc/omp-expand.c.jj 2020-11-27 11:25:04.567490804 +0100
+++ gcc/omp-expand.c2020-12-09 12:41:39.734730025 +0100
@@ -4304,13 +4304,18 @@ expand_omp_for_generic (struct omp_regio
  gsi = gsi_last_bb (l0_bb);
  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
   istart0, true);
- gsi = gsi_last_bb (cont_bb);
- t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
-  build_int_cst (fd->iter_type, 1));
- expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
- tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
- size_zero_node, NULL_TREE, NULL_TREE);
- expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
+ if (cont_bb)
+   {
+ gsi = gsi_last_bb (cont_bb);
+ t = fold_build2 (PLUS_EXPR, fd->iter_type,
+  counts[fd->collapse - 1],
+  build_int_cst (fd->iter_type, 1));
+ expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
+ tree aref = build4 (ARRAY_REF, fd->iter_type,
+ counts[fd->ordered], size_zero_node,
+ NULL_TREE, NULL_TREE);
+ expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
+   }
  t = counts[fd->collapse - 1];
}
   else if (fd->collapse > 1)
--- gcc/testsuite/c-c++-common/gomp/doacross-4.c.jj 2020-12-09 
13:00:07.295383311 +0100
+++ gcc/testsuite/c-c++-common/gomp/doacross-4.c2020-12-09 
12:59:28.894811370 +0100
@@ -0,0 +1,30 @@
+/* PR middle-end/98205 */
+
+void baz (int) __attribute__((noreturn));
+
+void
+foo (int n)
+{
+  int i;
+  #pragma omp for ordered(1)
+  for (i = 0; i < 8; i += n)
+{
+  #pragma omp ordered depend(source)
+  #pragma omp ordered depend(sink: i - 2)
+  baz (i);
+}
+}
+
+void
+bar (int n)
+{
+  int i, j;
+  #pragma omp for collapse(2) ordered(2)
+  for (i = 0; i < 8; i += n)
+for (j = 0; j < 8; j += n)
+  {
+#pragma omp ordered depend(source)
+#pragma omp ordered depend(sink: i - 2, j + 2)
+baz (i);
+  }
+}

Jakub

1 2 >

1 - 100 of 114 matches

Mail list logo