Re: C++ PATCH for range-for tweak

2016-03-15 Thread Florian Weimer
* Jason Merrill:

> On 03/14/2016 05:30 PM, Florian Weimer wrote:
>> * Jason Merrill:
>>
>>> P08184R0: Generalizing the Range-Based For Loop
>>
>> How can one resolve this reference?  It's obviously not a PR number in
>> GCC Bugzilla.
>>
>> I found this after some searching:
>>
>> 
>>
>> But it lacks the additional “8”.
>
> Oops, typo.  Fixed, along with adjusting the feature-test macro.

Ah.  Looks like there are still references remaining in the tree:

gcc/cp/parser.c:/* P08184R0 allows __begin and __end to have 
different types,
gcc/testsuite/g++.dg/cpp1z/range-for1.C:// P08184R0: Generalizing the 
Range-Based For Loop


Re: [PATCH, match] Fix pr68714

2016-03-15 Thread Andreas Schwab
Richard Henderson  writes:

> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr68714.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr68714.c
> new file mode 100644
> index 000..741d311
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr68714.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-optimized" } */
> +
> +typedef int vec __attribute__((vector_size(16)));
> +vec f(vec x,vec y){
> +  return x +}
> +
> +/* { dg-final { scan-tree-dump-times " <= " 1 "optimized" } } */

That fails on ia64:

$ grep " <= " pr68714.c.211t.optimized 
  _10 = _8 <= _9 ? -1 : 0;
  _13 = _11 <= _12 ? -1 : 0;
  _16 = _14 <= _15 ? -1 : 0;

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Save call-clobbered registers in _mcount on 32-bit Solaris/x86 (PR target/38239)

2016-03-15 Thread Rainer Orth
I've recently been pointed at PR target/38239, where trivial programs
compiled/linked with -pg SEGV on 32-bit Solaris/x86 since _mcount
clobbers %ecx.  It was initially reported for gcc 4.3, but I couldn't
reproduce it there and the testcase works fine up to and including gcc
4.8.  Since 4.9, the SEGV reported occurs again.

Upon closer investigation (checking i386.c (x86_function_profiler),
final.c (profile_function) and the _mcount implemtations of glibc and
BSDs), it turns out that _mcount isn't called like a regular function,
but is expected to save and restore call-clobbered registers itself.
Unfortunately, there's no specification on how _mcount is expected to
behave.

The fix is trivial, of course.  Bootstrapped without regressions on
i386-pc-solaris2.1[012].  Will install on mainline soon, and on the 5
and 4.9 branches in about a week, given that this is a longstanding
regression that makes -pg profiling unusable/unreliable.

Rainer


2016-03-11  Rainer Orth  

PR target/38239
* config/sol2/gmon.c [__i386__] (_mcount): Save and restore
call-clobbered registers.
(internal_mcount): Remove __i386__ handling.

# HG changeset patch
# Parent  2a1815d64dfb93b0ac1d59c094d4da5347634d38
Save call-clobbered registers in _mcount on 32-bit Solaris/x86 (PR target/38239)

diff --git a/libgcc/config/sol2/gmon.c b/libgcc/config/sol2/gmon.c
--- a/libgcc/config/sol2/gmon.c
+++ b/libgcc/config/sol2/gmon.c
@@ -44,11 +44,7 @@
 
 extern void monstartup (char *, char *);
 extern void _mcleanup (void);
-#ifdef __i386__
-static void internal_mcount (void) __attribute__ ((used));
-#else
 static void internal_mcount (char *, unsigned short *) __attribute__ ((used));
-#endif
 static void moncontrol (int);
 
 struct phdr {
@@ -223,8 +219,19 @@ void
 /* Solaris 2 libraries use _mcount.  */
 #if defined __i386__
 asm(".globl _mcount\n"
+"	.type	_mcount, @function\n"
 "_mcount:\n"
-"	jmp	internal_mcount\n");
+/* Save and restore the call-clobbered registers.  */
+"	pushl	%eax\n"
+"	pushl	%ecx\n"
+"	pushl	%edx\n"
+"	movl	12(%esp), %edx\n"
+"	movl	4(%ebp), %eax\n"
+"	call	internal_mcount\n"
+"	popl	%edx\n"
+"	popl	%ecx\n"
+"	popl	%eax\n"
+"	ret\n");
 #elif defined __x86_64__
 /* See GLIBC for additional information about this technique.  */
 asm(".globl _mcount\n" 
@@ -299,32 +306,13 @@ asm(".global _mcount\n"
 #endif
 
 static void
-#ifdef __i386__
-internal_mcount (void)
-#else
 internal_mcount (char *selfpc, unsigned short *frompcindex)
-#endif
 {
   struct tostruct *top;
   struct tostruct *prevtop;
   long toindex;
   static char already_setup;
 
-#ifdef __i386__
-  char *selfpc;
-  unsigned short *frompcindex;
-
-  /* Find the return address for mcount and the return address for mcount's
- caller.  */
-
-  /* selfpc = pc pushed by mcount call.
- This identifies the function that was just entered.  */
-  selfpc = (void *) __builtin_return_address (0);
-  /* frompcindex = pc in preceding frame.
- This identifies the caller of the function just entered.  */
-  frompcindex = (void *) __builtin_return_address (1);
-#endif
-
 /* Only necessary without the Solaris CRTs or a proper gcrt1.o, otherwise
crtpg.o or gcrt1.o take care of that.
 
# HG changeset patch
# Parent  bc0ec420e463ad63db543a27592dd7dca577b7ad
Save call-clobbered registers in _mcount on 32-bit Solaris/x86 (PR target/38239)

diff --git a/libgcc/config/gmon-sol2.c b/libgcc/config/gmon-sol2.c
--- a/libgcc/config/gmon-sol2.c
+++ b/libgcc/config/gmon-sol2.c
@@ -43,11 +43,7 @@
 
 extern void monstartup (char *, char *);
 extern void _mcleanup (void);
-#ifdef __i386__
-static void internal_mcount (void) __attribute__ ((used));
-#else
 static void internal_mcount (char *, unsigned short *) __attribute__ ((used));
-#endif
 static void moncontrol (int);
 
 struct phdr {
@@ -222,8 +218,19 @@ void
 /* Solaris 2 libraries use _mcount.  */
 #if defined __i386__
 asm(".globl _mcount\n"
+"	.type	_mcount, @function\n"
 "_mcount:\n"
-"	jmp	internal_mcount\n");
+/* Save and restore the call-clobbered registers.  */
+"	pushl	%eax\n"
+"	pushl	%ecx\n"
+"	pushl	%edx\n"
+"	movl	12(%esp), %edx\n"
+"	movl	4(%ebp), %eax\n"
+"	call	internal_mcount\n"
+"	popl	%edx\n"
+"	popl	%ecx\n"
+"	popl	%eax\n"
+"	ret\n");
 #elif defined __x86_64__
 /* See GLIBC for additional information about this technique.  */
 asm(".globl _mcount\n" 
@@ -298,32 +305,13 @@ asm(".global _mcount\n"
 #endif
 
 static void
-#ifdef __i386__
-internal_mcount (void)
-#else
 internal_mcount (char *selfpc, unsigned short *frompcindex)
-#endif
 {
   struct tostruct *top;
   struct tostruct *prevtop;
   long toindex;
   static char already_setup;
 
-#ifdef __i386__
-  char *selfpc;
-  unsigned short *frompcindex;
-
-  /* Find the return address for mcount and the return address for mcount's
- caller.  */
-
-  /* selfpc = pc pushed by mcount call.
- This identi

Re: [PATCH][GCC 7] Fix PR70171

2016-03-15 Thread Eric Botcazou
> It looks like it might catch a few extra cases where the address of the
> decl is required.  But it also looks like it's somewhat overly broad like
> 
>   /* Function types that are TREE_ADDRESSABLE force return in memory.  */
>   if (fntype && TREE_ADDRESSABLE (fntype))
> return 1;
> 
> without actually testing 'exp' is the return slot.

I think that this particular case can only trigger in Ada. ;-)

-- 
Eric Botcazou


C++ PATCH to fix missing warning (PR c++/70194)

2016-03-15 Thread Marek Polacek
This is to fix missing "address of %qD will never be NULL" warning that went
away since the delayed folding merge.  The problem was that cp_build_binary_op
was getting unfolded ops so in the constexpr case it saw "(int *) p" instead of
"&i" (in this particular testcase).  Fixed by calling fold_non_dependent_expr
as is done elsewhere.
(It doesn't seem like the "if (CONVERT_EXPR_P (op?)" blocks need to use cop?
too.)

I did not try to address the other issues Martin has raised in the PR yet.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2016-03-15  Marek Polacek  

PR c++/70194
* typeck.c (cp_build_binary_op): Call fold_non_dependent_expr before
warning about an address not being null.

* g++.dg/warn/constexpr-70194.C: New test.

diff --git gcc/cp/typeck.c gcc/cp/typeck.c
index 20f0afc..a789c7a 100644
--- gcc/cp/typeck.c
+++ gcc/cp/typeck.c
@@ -4520,14 +4520,16 @@ cp_build_binary_op (location_t location,
  else
result_type = type0;
 
- if (TREE_CODE (op0) == ADDR_EXPR
- && decl_with_nonnull_addr_p (TREE_OPERAND (op0, 0)))
+ tree cop0 = fold_non_dependent_expr (op0);
+
+ if (TREE_CODE (cop0) == ADDR_EXPR
+ && decl_with_nonnull_addr_p (TREE_OPERAND (cop0, 0)))
{
  if ((complain & tf_warning)
  && c_inhibit_evaluation_warnings == 0
- && !TREE_NO_WARNING (op0))
+ && !TREE_NO_WARNING (cop0))
warning (OPT_Waddress, "the address of %qD will never be NULL",
-TREE_OPERAND (op0, 0));
+TREE_OPERAND (cop0, 0));
}
 
  if (CONVERT_EXPR_P (op0)
@@ -4559,14 +4561,16 @@ cp_build_binary_op (location_t location,
  else
result_type = type1;
 
- if (TREE_CODE (op1) == ADDR_EXPR 
- && decl_with_nonnull_addr_p (TREE_OPERAND (op1, 0)))
+ tree cop1 = fold_non_dependent_expr (op1);
+
+ if (TREE_CODE (cop1) == ADDR_EXPR
+ && decl_with_nonnull_addr_p (TREE_OPERAND (cop1, 0)))
{
  if ((complain & tf_warning)
  && c_inhibit_evaluation_warnings == 0
- && !TREE_NO_WARNING (op1))
+ && !TREE_NO_WARNING (cop1))
warning (OPT_Waddress, "the address of %qD will never be NULL",
-TREE_OPERAND (op1, 0));
+TREE_OPERAND (cop1, 0));
}
 
  if (CONVERT_EXPR_P (op1)
diff --git gcc/testsuite/g++.dg/warn/constexpr-70194.C 
gcc/testsuite/g++.dg/warn/constexpr-70194.C
index e69de29..cdc56c0 100644
--- gcc/testsuite/g++.dg/warn/constexpr-70194.C
+++ gcc/testsuite/g++.dg/warn/constexpr-70194.C
@@ -0,0 +1,12 @@
+// PR c++/70194
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wall" }
+
+int i;
+
+const bool b0 = &i == 0; // { dg-warning "the address of .i. will never be 
NULL" }
+constexpr int *p = &i;
+const bool b1 = p == 0; // { dg-warning "the address of .i. will never be 
NULL" }
+const bool b2 = 0 == p; // { dg-warning "the address of .i. will never be 
NULL" }
+const bool b3 = p != 0; // { dg-warning "the address of .i. will never be 
NULL" }
+const bool b4 = 0 != p; // { dg-warning "the address of .i. will never be 
NULL" }

Marek


Re: [PATCH, match] Fix pr68714

2016-03-15 Thread Richard Biener
On Tue, 15 Mar 2016, Andreas Schwab wrote:

> Richard Henderson  writes:
> 
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr68714.c 
> > b/gcc/testsuite/gcc.dg/tree-ssa/pr68714.c
> > new file mode 100644
> > index 000..741d311
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr68714.c
> > @@ -0,0 +1,9 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -fdump-tree-optimized" } */
> > +
> > +typedef int vec __attribute__((vector_size(16)));
> > +vec f(vec x,vec y){
> > +  return x > +}
> > +
> > +/* { dg-final { scan-tree-dump-times " <= " 1 "optimized" } } */
> 
> That fails on ia64:
> 
> $ grep " <= " pr68714.c.211t.optimized 
>   _10 = _8 <= _9 ? -1 : 0;
>   _13 = _11 <= _12 ? -1 : 0;
>   _16 = _14 <= _15 ? -1 : 0;

Probably on all targets that don't support V4SImode vectors.  Though
three cond_exprs is odd ;)  I suppose we got one DImode and two SImode
but that would be odd behavior from veclower...

Richard.


[patch] Fix PR bootstrap/69513

2016-03-15 Thread Eric Botcazou
Hi,

this is the failure of the LTO bootstrap with profile-directed optimization 
when compiling gnat1 with debug info.  The issue is that flush_limbo_die_list 
doesn't actually fully flush the limbo list because it calls get_context_die, 
which calls force_decl_die, which can put new DIEs on the limbo list in LTO
mode when nested functions are put in a different partition than that of their 
parent function.

Richard, this is the version of the patch that you said you would approve.
Tested on x86_64-suse-linux, is it OK for GCC 6 or must it wait until GCC 7?


2016-03-15  Eric Botcazou  

PR bootstrap/69513
* dwarf2out.c (flush_limbo_die_list): Really flush the limbo list.

-- 
Eric BotcazouIndex: dwarf2out.c
===
--- dwarf2out.c	(revision 234171)
+++ dwarf2out.c	(working copy)
@@ -27281,12 +27281,15 @@ optimize_location_lists (dw_die_ref die)
 static void
 flush_limbo_die_list (void)
 {
-  limbo_die_node *node, *next_node;
+  limbo_die_node *node;
 
-  for (node = limbo_die_list; node; node = next_node)
+  /* get_context_die calls force_decl_die, which can put new DIEs on the
+ limbo list in LTO mode when nested functions are put in a different
+ partition than that of their parent function.  */
+  while ((node = limbo_die_list))
 {
   dw_die_ref die = node->die;
-  next_node = node->next;
+  limbo_die_list = node->next;
 
   if (die->die_parent == NULL)
 	{
@@ -27324,8 +27327,6 @@ flush_limbo_die_list (void)
 	}
 	}
 }
-
-  limbo_die_list = NULL;
 }
 
 /* Output stuff that dwarf requires at the end of every file,


Re: C++ PATCH to fix missing warning (PR c++/70194)

2016-03-15 Thread Jakub Jelinek
On Tue, Mar 15, 2016 at 11:41:20AM +0100, Marek Polacek wrote:
> This is to fix missing "address of %qD will never be NULL" warning that went
> away since the delayed folding merge.  The problem was that cp_build_binary_op
> was getting unfolded ops so in the constexpr case it saw "(int *) p" instead 
> of
> "&i" (in this particular testcase).  Fixed by calling fold_non_dependent_expr
> as is done elsewhere.
> (It doesn't seem like the "if (CONVERT_EXPR_P (op?)" blocks need to use cop?
> too.)
> 
> I did not try to address the other issues Martin has raised in the PR yet.
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> 2016-03-15  Marek Polacek  
> 
>   PR c++/70194
>   * typeck.c (cp_build_binary_op): Call fold_non_dependent_expr before
>   warning about an address not being null.
> 
>   * g++.dg/warn/constexpr-70194.C: New test.
> 
> diff --git gcc/cp/typeck.c gcc/cp/typeck.c
> index 20f0afc..a789c7a 100644
> --- gcc/cp/typeck.c
> +++ gcc/cp/typeck.c
> @@ -4520,14 +4520,16 @@ cp_build_binary_op (location_t location,
> else
>   result_type = type0;
>  
> -   if (TREE_CODE (op0) == ADDR_EXPR
> -   && decl_with_nonnull_addr_p (TREE_OPERAND (op0, 0)))
> +   tree cop0 = fold_non_dependent_expr (op0);
> +
> +   if (TREE_CODE (cop0) == ADDR_EXPR
> +   && decl_with_nonnull_addr_p (TREE_OPERAND (cop0, 0)))

>From compile time perspective, I wonder if it wouldn't be better to do
the cheap tests early, like:
if (warn_address
&& (complain & tf_warning)
&& c_inhibit_evaluation_warnings == 0
&& !TREE_NO_WARNING (op0))
  {
tree cop0 = fold_non_dependent_expr (op0);

if (TREE_CODE (cop0) == ADDR_EXPR
&& decl_with_nonnull_addr_p (TREE_OPERAND (cop0, 0))
&& !TREE_NO_WARNING (cop0))
  warning (OPT_waddress, "the address of %qD will never be NULL",
   TREE_OPERAND (cop0, 0));
  }

thus perform fold_non_dependent_expr only if it is needed.
Furthermore, I wonder if it isn't preferrable to %qD the non-folded
expression (if it is ADDR_EXPR, that is), so perhaps:
TREE_OPERAND (TREE_CODE (op0) == ADDR_EXPR ? op0 : cop0, 0)
?

Jakub


[PATCH] Fix combine's simplify_shift_const_1 (PR rtl-optimization/70222)

2016-03-15 Thread Jakub Jelinek
Hi!

As mentioned in the PR, if we have
simplify_shift_const_1 (code=LSHIFTRT, result_mode=SImode, 
varop=0x7184de70, orig_count=31)
where varop is:
(subreg:SI (lshiftrt:DI (const_int -1 [0x])
(subreg:QI (reg:SI 100) 0)) 0),
we optimize the inner shift (mode == DImode) into
(lshiftrt:DI (const_int 8589934591 [0x1])
(subreg:QI (reg:SI 100) 0)) 0)
but because result_mode == shift_mode == SImode (only mode == DImode),
we don't perform the needed masking, the outer shift is with new
count == 0 and thus not done at all.
Seems testsuite coverage for this function is very low and I'm sure there
is a lot of other bugs in that function; I've performed bootstrap/regtest
on x86_64-linux and i686-linux with this patch (without the && orig_count !=
0 && outer_op == UNKNOWN && !complement_p) part, with additional gathering
of interesting calls to this function, see the PR for some details,
but the summary is that this patch triggers only on the new testcases and
nothing else during bootstrap/regtest, fixes the testcase and while in
theory it could pessimize some cases (either by adding some obviously
correct, but unneeded AND around it, or by combiner not combining
something), it shouldn't break stuff (which is why I've added the
extra checks, if there are outer ops (outer_op or complement_p), the
patch could change the generated code and break something).
For GCC 7, I believe we should try to add sufficient testsuite coverage for
all the different cases (e.g. for all the cases where it sets count == 0,
in each case with mode != result_mode and mode == result_mode).

Ok for trunk?

2016-03-15  Jakub Jelinek  

PR rtl-optimization/70222
* combine.c (simplify_shift_const_1): Force simplify_and_const_int
for LSHIFTRT if result_mode != mode && count == 0 and no outer
operation is scheduled.

* gcc.c-torture/execute/pr70222-1.c: New test.
* gcc.c-torture/execute/pr70222-2.c: New test.

--- gcc/combine.c.jj2016-03-14 23:18:37.958408627 +0100
+++ gcc/combine.c   2016-03-15 11:47:24.474968676 +0100
@@ -10835,8 +10835,16 @@ simplify_shift_const_1 (enum rtx_code co
 x = simplify_gen_binary (code, shift_mode, varop, GEN_INT (count));
 
   /* If we were doing an LSHIFTRT in a wider mode than it was originally,
- turn off all the bits that the shift would have turned off.  */
-  if (orig_code == LSHIFTRT && result_mode != shift_mode)
+ turn off all the bits that the shift would have turned off.
+ Similarly do this if we've optimized varop so that we don't perform
+ any shift.  */
+  if (orig_code == LSHIFTRT
+  && (result_mode != shift_mode
+ || (result_mode != mode
+ && count == 0
+ && orig_count != 0
+ && outer_op == UNKNOWN
+ && !complement_p)))
 x = simplify_and_const_int (NULL_RTX, shift_mode, x,
GET_MODE_MASK (result_mode) >> orig_count);
 
--- gcc/testsuite/gcc.c-torture/execute/pr70222-1.c.jj  2016-03-15 
11:30:41.657000384 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr70222-1.c 2016-03-15 
11:30:41.657000384 +0100
@@ -0,0 +1,30 @@
+/* PR rtl-optimization/70222 */
+
+int a = 1;
+unsigned int b = 2;
+int c = 0;
+int d = 0;
+
+void
+foo ()
+{
+  int e = ((-(c >= c)) < b) > ((int) (-1ULL >> ((a / a) * 15)));
+  d = -e;
+}
+
+__attribute__((noinline, noclone)) void
+bar (int x)
+{
+  if (x != -1)
+__builtin_abort ();
+}
+
+int
+main ()
+{
+#if __CHAR_BIT__ == 8 && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
+  foo ();
+  bar (d);
+#endif
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr70222-2.c.jj  2016-03-15 
11:36:13.273366841 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr70222-2.c 2016-03-15 
11:36:18.156298614 +0100
@@ -0,0 +1,20 @@
+/* PR rtl-optimization/70222 */
+
+#if __CHAR_BIT__ == 8 && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
+__attribute__((noinline, noclone)) unsigned int
+foo (int x)
+{
+  unsigned long long y = -1ULL >> x;
+  return (unsigned int) y >> 31;
+}
+#endif
+
+int
+main ()
+{
+#if __CHAR_BIT__ == 8 && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
+  if (foo (15) != 1 || foo (32) != 1 || foo (33) != 0)
+__builtin_abort ();
+#endif
+  return 0;
+}

Jakub


Re: [patch] Fix PR bootstrap/69513

2016-03-15 Thread Richard Biener
On Tue, 15 Mar 2016, Eric Botcazou wrote:

> Hi,
> 
> this is the failure of the LTO bootstrap with profile-directed optimization 
> when compiling gnat1 with debug info.  The issue is that flush_limbo_die_list 
> doesn't actually fully flush the limbo list because it calls get_context_die, 
> which calls force_decl_die, which can put new DIEs on the limbo list in LTO
> mode when nested functions are put in a different partition than that of 
> their 
> parent function.
> 
> Richard, this is the version of the patch that you said you would approve.
> Tested on x86_64-suse-linux, is it OK for GCC 6 or must it wait until GCC 7?

It's ok for GCC 6.

Thanks,
Richard.


> 2016-03-15  Eric Botcazou  
> 
>   PR bootstrap/69513
>   * dwarf2out.c (flush_limbo_die_list): Really flush the limbo list.


Re: [PATCH, PR70161] Fix fdump-ipa-all-graph

2016-03-15 Thread Richard Biener
On Mon, 14 Mar 2016, Tom de Vries wrote:

> Hi,
> 
> this patch fixes PR70161, a 4.9/5/6 regression.
> 
> Currently when using -fdump-ipa-all-graph, the compiler ICEs in
> execute_function_dump when testing for pass->graph_dump_initialized, because
> pass == NULL.
> 
> The patch fixes:
> - the ICE by setting the pass argument in the call to
>   execute_function_dump in execute_one_ipa_transform_pass
> - a subsequent ICE (triggered with -fipa-pta) by saving, resetting and
>   restoring dump_file_name in cgraph_node::get_body, alongside the
>   saving and restoring of the dump_file variable.
> - the duplicate edges in the subsequently generated dot file by
>   ensuring that execute_function_dump is called only once per function
>   per pass. [ Note that this bit also has an effect for the normal dump
>   files for the ipa passes with transform function. For those functions,
>   atm execute_function_dump is called both after execute and after
>   transform. With the patch, it's only called after transform. ]
> 
> Bootstrapped and reg-tested on x86_64.
> 
> OK for stage4?

Ok.

Thanks,
Richard.


[PATCH] Retry to emit global variables in HSA (PR hsa/70234)

2016-03-15 Thread Martin Liška
Hi.

As emission of a HSAIL function can fail for various reason (-Whsa),
we must guarantee that a global variable is declared and at maximum once.

Following patch does that, patch can survive make check-target-libgomp and
HSAILAsm is happy with BRIG output of declare_target-5.c source file.

Currently, I'm running bootstrap on x86_64-linux-gnu.
Ready to install after if finishes?

Thanks,
Martin

gcc/ChangeLog:

2016-03-15  Martin Liska  

PR hsa/70234
* hsa-brig.c (emit_function_directives): Mark unemitted
global variables for emission.
* hsa-gen.c (hsa_symbol::hsa_symbol): Initialize a new flag.
(get_symbol_for_decl): Likewise.
* hsa.h (struct hsa_symbol): New flag.
---
 gcc/hsa-brig.c |  2 ++
 gcc/hsa-gen.c  | 22 +++---
 gcc/hsa.h  |  3 +++
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c
index 2a301be..9b6c0b8 100644
--- a/gcc/hsa-brig.c
+++ b/gcc/hsa-brig.c
@@ -643,6 +643,8 @@ emit_function_directives (hsa_function_representation *f, 
bool is_declaration)
   if (!f->m_declaration_p)
 for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++)
   {
+   gcc_assert (!sym->m_emitted_to_brig);
+   sym->m_emitted_to_brig = true;
emit_directive_variable (sym);
brig_insn_count++;
   }
diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
index 5939a57..473d4bd 100644
--- a/gcc/hsa-gen.c
+++ b/gcc/hsa-gen.c
@@ -162,7 +162,7 @@ hsa_symbol::hsa_symbol ()
 m_directive_offset (0), m_type (BRIG_TYPE_NONE),
 m_segment (BRIG_SEGMENT_NONE), m_linkage (BRIG_LINKAGE_NONE), m_dim (0),
 m_cst_value (NULL), m_global_scope_p (false), m_seen_error (false),
-m_allocation (BRIG_ALLOCATION_AUTOMATIC)
+m_allocation (BRIG_ALLOCATION_AUTOMATIC), m_emitted_to_brig (false)
 {
 }
 
@@ -174,7 +174,7 @@ hsa_symbol::hsa_symbol (BrigType16_t type, BrigSegment8_t 
segment,
 m_directive_offset (0), m_type (type), m_segment (segment),
 m_linkage (linkage), m_dim (0), m_cst_value (NULL),
 m_global_scope_p (global_scope_p), m_seen_error (false),
-m_allocation (allocation)
+m_allocation (allocation), m_emitted_to_brig (false)
 {
 }
 
@@ -880,11 +880,27 @@ get_symbol_for_decl (tree decl)
   gcc_checking_assert (slot);
   if (*slot)
 {
+  hsa_symbol *sym = (*slot);
+
   /* If the symbol is problematic, mark current function also as
 problematic.  */
-  if ((*slot)->m_seen_error)
+  if (sym->m_seen_error)
hsa_fail_cfun ();
 
+  /* PR hsa/70234: If a global variable was marked to be emitted,
+but HSAIL generation of a function using the variable fails,
+we should retry to emit the variable in context of a different
+function.
+
+Iterate elements whether a symbol is already in m_global_symbols
+of not.  */
+  for (unsigned i = 0; i < hsa_cfun->m_global_symbols.length (); i++)
+   if (hsa_cfun->m_global_symbols[i] == sym)
+ return *slot;
+
+  if (is_in_global_vars && !sym->m_emitted_to_brig)
+   hsa_cfun->m_global_symbols.safe_push (sym);
+
   return *slot;
 }
   else
diff --git a/gcc/hsa.h b/gcc/hsa.h
index 6a7c651..1d6baab 100644
--- a/gcc/hsa.h
+++ b/gcc/hsa.h
@@ -110,6 +110,9 @@ struct hsa_symbol
   /* Symbol allocation.  */
   BrigAllocation m_allocation;
 
+  /* Flag used for global variables if a variable is already emitted or not.  
*/
+  bool m_emitted_to_brig;
+
 private:
   /* Default constructor.  */
   hsa_symbol ();
-- 
2.7.1



C++ PATCH for C++17 hex float feature test macro

2016-03-15 Thread Jason Merrill
Another C++17 feature that's trivial to implement, since it was already 
supported: hex floating-point literals.  We just need to define the 
feature-test macro and correctly remove it from strict C++11/14 modes.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit 7fc42c9d978983956029b2b401682d1aa58d8d27
Author: Jason Merrill 
Date:   Fri Mar 4 21:38:21 2016 -0500

	* libcpp/expr.c (cpp_classify_number): Hex floats are new in C++1z.

	* libcpp/init.c (lang_defaults): Likewise.
	* gcc/c-family/c-cppbuiltin.c (c_cpp_builtins): Set __cpp_hex_float.

diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index dc1f426..ee953ca 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -818,6 +818,10 @@ c_cpp_builtins (cpp_reader *pfile)
   if (!pedantic || cxx_dialect > cxx11)
 	cpp_define (pfile, "__cpp_binary_literals=201304");
 
+  /* Similarly for hexadecimal floating point literals and C++17.  */
+  if (!pedantic || cpp_get_options (parse_in)->extended_numbers)
+	cpp_define (pfile, "__cpp_hex_float=201603");
+
   /* Arrays of runtime bound were removed from C++14, but we still
 	 support GNU VLAs.  Let's define this macro to a low number
 	 (corresponding to the initial test release of GNU C++) if we won't
diff --git a/gcc/testsuite/g++.dg/cpp/pr23827_cxx98_neg.C b/gcc/testsuite/g++.dg/cpp/pr23827_cxx98_neg.C
index 39d9fe4..a0e468c 100644
--- a/gcc/testsuite/g++.dg/cpp/pr23827_cxx98_neg.C
+++ b/gcc/testsuite/g++.dg/cpp/pr23827_cxx98_neg.C
@@ -1,4 +1,4 @@
 // { dg-do compile { target c++98_only } }
 /* { dg-options "-ansi -pedantic-errors" }  */
 
-double x = 0x3.1415babep0; // { dg-error "use of C..11 hexadecimal floating constant" }
+double x = 0x3.1415babep0; // { dg-error "use of C..1z hexadecimal floating constant" }
diff --git a/libcpp/expr.c b/libcpp/expr.c
index 5353bde..5cdca6f 100644
--- a/libcpp/expr.c
+++ b/libcpp/expr.c
@@ -552,7 +552,7 @@ cpp_classify_number (cpp_reader *pfile, const cpp_token *token,
 	{
 	  if (CPP_OPTION (pfile, cplusplus))
 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, virtual_location, 0,
- "use of C++11 hexadecimal floating constant");
+ "use of C++1z hexadecimal floating constant");
 	  else
 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, virtual_location, 0,
  "use of C99 hexadecimal floating constant");
diff --git a/libcpp/init.c b/libcpp/init.c
index 6bc4296..4343075 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -105,9 +105,9 @@ static const struct lang_flags lang_defaults[] =
   /* GNUCXX   */  { 0,  1,  1,  1,  0,  0,  1,   0,   0,   0,0, 0, 0,   0 },
   /* CXX98*/  { 0,  1,  0,  1,  0,  1,  1,   0,   0,   0,0, 0, 1,   0 },
   /* GNUCXX11 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,0, 0, 0,   0 },
-  /* CXX11*/  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,0, 0, 1,   0 },
+  /* CXX11*/  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,0, 0, 1,   0 },
   /* GNUCXX14 */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,1, 1, 0,   0 },
-  /* CXX14*/  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,1, 1, 1,   0 },
+  /* CXX14*/  { 1,  1,  0,  1,  1,  1,  1,   1,   1,   1,1, 1, 1,   0 },
   /* GNUCXX1Z */  { 1,  1,  1,  1,  1,  0,  1,   1,   1,   1,1, 1, 0,   1 },
   /* CXX1Z*/  { 1,  1,  1,  1,  1,  1,  1,   1,   1,   1,1, 1, 0,   1 },
   /* ASM  */  { 0,  0,  1,  0,  0,  0,  0,   0,   0,   0,0, 0, 0,   0 }


Re: [PATCH] Fix combine's simplify_shift_const_1 (PR rtl-optimization/70222)

2016-03-15 Thread Bernd Schmidt

On 03/15/2016 12:14 PM, Jakub Jelinek wrote:

-  if (orig_code == LSHIFTRT && result_mode != shift_mode)
+ turn off all the bits that the shift would have turned off.
+ Similarly do this if we've optimized varop so that we don't perform
+ any shift.  */
+  if (orig_code == LSHIFTRT
+  && (result_mode != shift_mode
+ || (result_mode != mode
+ && count == 0
+ && orig_count != 0
+ && outer_op == UNKNOWN
+ && !complement_p)))


This looks really specialized, and I'd be worrying about whether it 
really is the right condition. Where exactly was the constant shifted by 
31 and count set to 0? Must be here, right?


   /* If we have (A << B << C) for any shift, we can convert this to
  (A << C << B).  This wins if A is a constant.  Only try this if
  B is not a constant.  */

   else if (GET_CODE (varop) == code
&& CONST_INT_P (XEXP (varop, 0))
&& !CONST_INT_P (XEXP (varop, 1)))
{
  rtx new_rtx = simplify_const_binary_operation (code, mode,
XEXP (varop, 0),
GEN_INT (count));
  varop = gen_rtx_fmt_ee (code, mode, new_rtx, XEXP (varop, 1));
  count = 0;
  continue;
}

I think it might be clearer to notice and fix the problem here (or set a 
need_mask flag).



Bernd




Re: C++ PATCH to fix missing warning (PR c++/70194)

2016-03-15 Thread Marek Polacek
On Tue, Mar 15, 2016 at 11:56:18AM +0100, Jakub Jelinek wrote:
> From compile time perspective, I wonder if it wouldn't be better to do
> the cheap tests early, like:
>   if (warn_address
>   && (complain & tf_warning)
>   && c_inhibit_evaluation_warnings == 0
>   && !TREE_NO_WARNING (op0))
> {
>   tree cop0 = fold_non_dependent_expr (op0);
> 
>   if (TREE_CODE (cop0) == ADDR_EXPR
>   && decl_with_nonnull_addr_p (TREE_OPERAND (cop0, 0))
>   && !TREE_NO_WARNING (cop0))
> warning (OPT_waddress, "the address of %qD will never be NULL",
>  TREE_OPERAND (cop0, 0));
> }
> thus perform fold_non_dependent_expr only if it is needed.

Ok, makes sense.

> Furthermore, I wonder if it isn't preferrable to %qD the non-folded
> expression (if it is ADDR_EXPR, that is), so perhaps:
> TREE_OPERAND (TREE_CODE (op0) == ADDR_EXPR ? op0 : cop0, 0)
> ?

I tried this before but it gave the same output as with what I have now,
so I left this unchanged in this version...

Thanks.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2016-03-15  Marek Polacek  

PR c++/70194
* typeck.c (cp_build_binary_op): Call fold_non_dependent_expr before
warning about an address not being null.  Check cheap stuff first.

* g++.dg/warn/constexpr-70194.C: New test.

diff --git gcc/cp/typeck.c gcc/cp/typeck.c
index 20f0afc..5069e88 100644
--- gcc/cp/typeck.c
+++ gcc/cp/typeck.c
@@ -4520,14 +4520,18 @@ cp_build_binary_op (location_t location,
  else
result_type = type0;
 
- if (TREE_CODE (op0) == ADDR_EXPR
- && decl_with_nonnull_addr_p (TREE_OPERAND (op0, 0)))
+ if (warn_address
+ && (complain & tf_warning)
+ && c_inhibit_evaluation_warnings == 0
+ && !TREE_NO_WARNING (op0))
{
- if ((complain & tf_warning)
- && c_inhibit_evaluation_warnings == 0
- && !TREE_NO_WARNING (op0))
+ tree cop0 = fold_non_dependent_expr (op0);
+
+ if (TREE_CODE (cop0) == ADDR_EXPR
+ && decl_with_nonnull_addr_p (TREE_OPERAND (cop0, 0))
+ && !TREE_NO_WARNING (cop0))
warning (OPT_Waddress, "the address of %qD will never be NULL",
-TREE_OPERAND (op0, 0));
+TREE_OPERAND (cop0, 0));
}
 
  if (CONVERT_EXPR_P (op0)
@@ -4559,14 +4563,18 @@ cp_build_binary_op (location_t location,
  else
result_type = type1;
 
- if (TREE_CODE (op1) == ADDR_EXPR 
- && decl_with_nonnull_addr_p (TREE_OPERAND (op1, 0)))
+ if (warn_address
+ && (complain & tf_warning)
+ && c_inhibit_evaluation_warnings == 0
+ && !TREE_NO_WARNING (op1))
{
- if ((complain & tf_warning)
- && c_inhibit_evaluation_warnings == 0
- && !TREE_NO_WARNING (op1))
+ tree cop1 = fold_non_dependent_expr (op1);
+
+ if (TREE_CODE (cop1) == ADDR_EXPR
+ && decl_with_nonnull_addr_p (TREE_OPERAND (cop1, 0))
+ && !TREE_NO_WARNING (cop1))
warning (OPT_Waddress, "the address of %qD will never be NULL",
-TREE_OPERAND (op1, 0));
+TREE_OPERAND (cop1, 0));
}
 
  if (CONVERT_EXPR_P (op1)
diff --git gcc/testsuite/g++.dg/warn/constexpr-70194.C 
gcc/testsuite/g++.dg/warn/constexpr-70194.C
index e69de29..cdc56c0 100644
--- gcc/testsuite/g++.dg/warn/constexpr-70194.C
+++ gcc/testsuite/g++.dg/warn/constexpr-70194.C
@@ -0,0 +1,12 @@
+// PR c++/70194
+// { dg-do compile { target c++11 } }
+// { dg-options "-Wall" }
+
+int i;
+
+const bool b0 = &i == 0; // { dg-warning "the address of .i. will never be 
NULL" }
+constexpr int *p = &i;
+const bool b1 = p == 0; // { dg-warning "the address of .i. will never be 
NULL" }
+const bool b2 = 0 == p; // { dg-warning "the address of .i. will never be 
NULL" }
+const bool b3 = p != 0; // { dg-warning "the address of .i. will never be 
NULL" }
+const bool b4 = 0 != p; // { dg-warning "the address of .i. will never be 
NULL" }

Marek


Re: [PATCH] PR69195, Reload confused by invalid reg equivs

2016-03-15 Thread Bernd Schmidt

On 03/15/2016 03:27 AM, Alan Modra wrote:

On Mon, Mar 14, 2016 at 01:00:39PM -0600, Jeff Law wrote:

Right.  Tolerant as in not crash.


So can someone please approve my ira.c:indirect_jump_optimize patch?
I'm not quite audacious enough to claim it is obvious.


Looks good to me.


Bernd



[oacc,testsuite] Add goacc/kernels-alias{,-2}.f95

2016-03-15 Thread Tom de Vries

Hi,

I've translated the goacc/kernels-alias{,-2}.c testcases to fortran.

Committed to trunk.

Thanks,
- Tom
Add goacc/kernels-alias{,-2}.f95

2016-03-15  Tom de Vries  

	* gfortran.dg/goacc/kernels-alias-2.f95: New test.
	* gfortran.dg/goacc/kernels-alias.f95: New test.

---
 .../gfortran.dg/goacc/kernels-alias-2.f95  | 23 ++
 gcc/testsuite/gfortran.dg/goacc/kernels-alias.f95  | 23 ++
 2 files changed, 46 insertions(+)

diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-alias-2.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-alias-2.f95
new file mode 100644
index 000..7e348dd
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-alias-2.f95
@@ -0,0 +1,23 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ealias-all" }
+
+program main
+  implicit none
+  integer, parameter :: n = 2
+  integer  :: a, b, c, d
+
+  !$acc kernels copyin (a) create (b) copyout (c) copy (d)
+  a = 0
+  b = 0
+  c = 0
+  d = 0
+  !$acc end kernels
+
+end program main
+
+! { dg-final { scan-tree-dump-times "clique 1 base 1" 4 "ealias" } }
+! { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "ealias" } }
+! { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "ealias" } }
+! { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "ealias" } }
+! { dg-final { scan-tree-dump-times "clique 1 base 5" 1 "ealias" } }
+! { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 8 "ealias" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-alias.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-alias.f95
new file mode 100644
index 000..8d6ccb3
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-alias.f95
@@ -0,0 +1,23 @@
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fdump-tree-ealias-all" }
+
+program main
+  implicit none
+  integer, parameter :: n = 2
+  integer, dimension (0:n-1) :: a, b, c, d
+
+  !$acc kernels copyin (a) create (b) copyout (c) copy (d)
+  a(0) = 0
+  b(0) = 0
+  c(0) = 0
+  d(0) = 0
+  !$acc end kernels
+
+end program main
+
+! { dg-final { scan-tree-dump-times "clique 1 base 1" 4 "ealias" } }
+! { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "ealias" } }
+! { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "ealias" } }
+! { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "ealias" } }
+! { dg-final { scan-tree-dump-times "clique 1 base 5" 1 "ealias" } }
+! { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 8 "ealias" } }


Re: [PATCH] Fix combine's simplify_shift_const_1 (PR rtl-optimization/70222)

2016-03-15 Thread Jakub Jelinek
On Tue, Mar 15, 2016 at 01:08:50PM +0100, Bernd Schmidt wrote:
> This looks really specialized, and I'd be worrying about whether it really
> is the right condition. Where exactly was the constant shifted by 31 and
> count set to 0? Must be here, right?

Yes, it is that spot.
> 
>/* If we have (A << B << C) for any shift, we can convert this to
>   (A << C << B).  This wins if A is a constant.  Only try this if
>   B is not a constant.  */
> 
>else if (GET_CODE (varop) == code
> && CONST_INT_P (XEXP (varop, 0))
> && !CONST_INT_P (XEXP (varop, 1)))
> {
>   rtx new_rtx = simplify_const_binary_operation (code, mode,
> XEXP (varop, 0),
> GEN_INT (count));
>   varop = gen_rtx_fmt_ee (code, mode, new_rtx, XEXP (varop, 1));
>   count = 0;
>   continue;
> }
> 
> I think it might be clearer to notice and fix the problem here (or set a
> need_mask flag).

So do you prefer this instead?

2016-03-15  Jakub Jelinek  

PR rtl-optimization/70222
* combine.c (simplify_shift_const_1): For A >> B >> C LSHIFTRT
optimization if mode is different from result_mode, queue up masking
of the result in outer_op.  Formatting fix.

* gcc.c-torture/execute/pr70222-1.c: New test.
* gcc.c-torture/execute/pr70222-2.c: New test.

--- gcc/combine.c.jj2016-03-14 23:18:37.958408627 +0100
+++ gcc/combine.c   2016-03-15 14:08:34.754434506 +0100
@@ -10524,9 +10524,19 @@ simplify_shift_const_1 (enum rtx_code co
   && CONST_INT_P (XEXP (varop, 0))
   && !CONST_INT_P (XEXP (varop, 1)))
{
+ /* For ((unsigned) (cstULL >> count)) >> cst2 we have to make
+sure the result will be masked.  See PR70222.  */
+ if (code == LSHIFTRT
+ && mode != result_mode
+ && !merge_outer_ops (&outer_op, &outer_const, AND,
+  GET_MODE_MASK (result_mode)
+  >> orig_count, result_mode,
+  &complement_p))
+   break;
+
  rtx new_rtx = simplify_const_binary_operation (code, mode,
-XEXP (varop, 0),
-GEN_INT (count));
+XEXP (varop, 0),
+GEN_INT (count));
  varop = gen_rtx_fmt_ee (code, mode, new_rtx, XEXP (varop, 1));
  count = 0;
  continue;
--- gcc/testsuite/gcc.c-torture/execute/pr70222-1.c.jj  2016-03-15 
11:30:41.657000384 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr70222-1.c 2016-03-15 
11:30:41.657000384 +0100
@@ -0,0 +1,30 @@
+/* PR rtl-optimization/70222 */
+
+int a = 1;
+unsigned int b = 2;
+int c = 0;
+int d = 0;
+
+void
+foo ()
+{
+  int e = ((-(c >= c)) < b) > ((int) (-1ULL >> ((a / a) * 15)));
+  d = -e;
+}
+
+__attribute__((noinline, noclone)) void
+bar (int x)
+{
+  if (x != -1)
+__builtin_abort ();
+}
+
+int
+main ()
+{
+#if __CHAR_BIT__ == 8 && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
+  foo ();
+  bar (d);
+#endif
+  return 0;
+}
--- gcc/testsuite/gcc.c-torture/execute/pr70222-2.c.jj  2016-03-15 
11:36:13.273366841 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr70222-2.c 2016-03-15 
11:36:18.156298614 +0100
@@ -0,0 +1,20 @@
+/* PR rtl-optimization/70222 */
+
+#if __CHAR_BIT__ == 8 && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
+__attribute__((noinline, noclone)) unsigned int
+foo (int x)
+{
+  unsigned long long y = -1ULL >> x;
+  return (unsigned int) y >> 31;
+}
+#endif
+
+int
+main ()
+{
+#if __CHAR_BIT__ == 8 && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
+  if (foo (15) != 1 || foo (32) != 1 || foo (33) != 0)
+__builtin_abort ();
+#endif
+  return 0;
+}


Jakub


Re: [PATCH, testsuite] Fix ifcvt-4.c for PowerPC

2016-03-15 Thread David Edelsohn
On Mon, Mar 14, 2016 at 4:23 PM, Pat Haugen  wrote:
> As stated in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68232, this test
> needs -misel on powerpc to pass. Verified the following fixes the test on
> both powerpc64/powerpc64le. Ok for trunk?
>
> -Pat
>
> testsuite/ChangeLog:
> 2016-03-14  Pat Haugen  
>
> * gcc.dg/ifcvt-4.c: Add -misel for powerpc* and remove skip for
> powerpc64le.

The -misel flag will override the code generation, even if the
architecture setting doesn't support the instruction.  I guess this is
good enough for the compile-only test.

This is okay.

Thanks, David


Re: [PATCH] Fix 70199

2016-03-15 Thread Richard Biener
On Tue, Mar 15, 2016 at 4:44 AM, Richard Henderson  wrote:
> The problem here is that
>
>   void* labels[] = {
> &&l0, &&l1, &&l2
>   };
>
> gets gimplified to
>
>   labels = *.LC0;
>
> but .LC0 is not in the set of local decls, so that when copy_forbidden is
> called during sra versioning we fail to forbid the copy.  We could set a
> different flag, but I think it's easiest to just add the artificial decl to
> where it can be seen.
>
> Ok?

Hmm.  tree_output_constant_def uses the global constant pool (and not
function-scope statics).  So while for the above case with local labels
there can be no sharing and thus the decl is really "local" with non-local
labels or with other random initializers you'd have the ctor decl in
multiple local decl vectors.  Not sure if that's a problem, but at least
if you'd have

  void* labels[] = {
&&l0, &&l1, &&l2
  };
  void* labels2[] = {
&&l0, &&l1, &&l2
  };

you'll end up with the same constant pool decl in local-decls twice.  Given
cross-function sharing is generally possible doing the addition to local-decls
only if we create a new constant pool entry isn't enough either.  It's also
a bit pre-mature in the gimplifier as we only add to local-decls during
BIND expr lowering.

I also wonder if outputting the constant pool decl far away from the labels
might end up with invalid asm for some targets.

Well, I don't see any convenient way of fixing things here either but maybe
we can do

  if (walk_tree_without_duplicataes (&DECL_INITIAL (ctor),
has_label_address_in_static_1, cfun->decl))
add_local_decl (cfun, ctor);

to avoid adding the decl when it is not necessary.  Having another
struct function
flag would be possible as well, or re-use has_nonlocal_label as clearly a global
static is now refering to a local label (you'd lose optimization when
'labels' becomes
unused of course).

Thanks,
Richard.

>
> r~


[PATCH] libffi: define FFI_SIZEOF_JAVA_RAW for aarch64 ILP32

2016-03-15 Thread Andreas Schwab
Like x32, aarch64 ILP32 needs to define FFI_SIZEOF_JAVA_RAW.  This fixes
the java interpreter.

Andreas.

* src/aarch64/ffitarget.h (FFI_SIZEOF_JAVA_RAW) [__ILP32__]:
Define.
---
 libffi/src/aarch64/ffitarget.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libffi/src/aarch64/ffitarget.h b/libffi/src/aarch64/ffitarget.h
index 2862ec7..34200ad 100644
--- a/libffi/src/aarch64/ffitarget.h
+++ b/libffi/src/aarch64/ffitarget.h
@@ -29,6 +29,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
 #ifndef LIBFFI_ASM
 #ifdef __ILP32__
 #define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW  4
 typedef unsigned long long ffi_arg;
 typedef signed long long ffi_sarg;
 #else
-- 
2.7.3

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [RFA][PATCH][PR tree-optimization/64058] Improve and stabilize sorting of coalesce pairs

2016-03-15 Thread Richard Biener
On Mon, Mar 14, 2016 at 11:32 PM, Jeff Law  wrote:
> On 03/11/2016 03:02 AM, Richard Biener wrote:
>>
>>
>>
>> For the other part I noticed a few things
>>   1) having a bitmap_count_ior_bits () would be an improvement
>
> Yea, I almost built one.  That's easy enough to add.
>
>>   2) you might end up with redundant work(?) as you are iterating
>>   over SSA name coalesce candidates but look at partition conflicts
>
> We'd have redundant work if the elements mapped back to SSA_NAMEs which in
> turn mapped to partitions which appeared as a coalescing pair already.  But
> there's no way to know that in advance.
>
> This is mitigated somewhat in the next version which computes the conflict
> sizes lazily when the qsort comparison function is given two conflict pairs
> with an equal cost.

That sounds good.

>>   3) having this extra heuristic might be best guarded by
>> flag_expensive_optimizations
>
> Perhaps.  I don't see this tie breaker as being all that expensive.  But I
> won't object to guarding with flag_expensive_optimizations.

Yeah, we should first address the quadraticness of the live compute which is
usually what hits us first when hitting a bottleneck in coalescing / out-of-SSA.

>>   as it is a quite expensive "tie breaker" - maybe even improve things
>> by first sorting
>>   after cost and then only doing the tie breaking when necessary,
>> re-sorting the
>>   sub-sequence with same original cost.  It may also be enough to only
>> perform
>>   this for "important" candidates, say within the first 100 of the
>> function or so
>>   or with cost > X.
>
> The problem with this is qsort's interface into the comparison function has
> a terribly narrow API and I don't think we want to rely on qsort_r.  In fact
> that's the whole reason why I didn't do lazy evaluation on the conflict
> sizes initially.
>
> To work around the narrow API in the comparison function we have to either
> store additional data in each node or have them available in globals.  The
> former would be horribly wasteful, the latter is just ugly.  I choose the
> latter in the lazy evaluation of the conflicts version.

Works for me.

>>
>> And finally - if we really think that looking at the conflict size
>> increase is the way to go
>> it would maybe be better to use a fibheap updating keys in
>> attempt_coalesce
>> when we merge the conflicts.  That would also mean to work on a list
>> (fibheap)
>> of coalesces of partitions rather than SSA names.
>
> I really doubt it's worth this effort.  The literature I've been looking at
> in this space essentially says that given a reasonable coalescer,
> improvements, while measurable, are very very small in terms of the
> efficiency of the final code.
>
> Thus I rejected conservative coalescing + iteration, biased coalescing, &
> trial coalescing as too expensive given the trivial benefit. Similarly I
> rejected trying to update the costs as we coalesce partitions.  A single
> successful coalesce could have a significant ripple effect.  Perhaps that
> could be mitigated by realizing that many updates wouldn't be needed, but
> it's just a level of complexity that's not needed here.

Ok.

> And note we work on partitions, not SSA_NAMEs.  It just happens that we
> start with each SSA_NAME in its own partition.  Most SSA_NAMEs actually
> don't participate in coalescing as they're not used in a copy instruction or
> as a phi arg/result.   That's why we compact the partitions after we've
> scanned the IL for names that are going to participate in coalescing.
>
>
>
>
>
>>
>> I think the patch is reasonable enough for GCC 6 if we can bring
>> compile-time
>> cost down a bit (it can be quadratic in the number of SSA names if we have
>> a lot of coalesce candidates and nearly full conflict bitmaps - of course
>> that's
>> not a case we handle very well right now but still).  I would have hoped
>> the
>> index part of the patch fixed the regression (by luck)...
>
> I'd hoped it'd fix the regression by luck as well, but that was not the case
> :(
>
>
>>
>> As far as a testcase goes we want to scan the dumps for the actual
>> coalesces
>> being done.  Might be a bit fragile though...
>
> I suspect that's going to be quite fragile and may have more target
> dependencies than we'd like (due to branch costing and such).

Yes.

Otherwise -ENOPATCH.

Thanks,
Richard.

>
>
> Jeff


[PATCH] boehm-gc: add supprt for aarch64 ILP32

2016-03-15 Thread Andreas Schwab
* include/private/gcconfig.h [AARCH64] (ALIGNMENT, CPP_WORDSZ):
Define for __ILP32__.
---
 boehm-gc/include/private/gcconfig.h | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/boehm-gc/include/private/gcconfig.h 
b/boehm-gc/include/private/gcconfig.h
index 7e081d9..aa81f15 100644
--- a/boehm-gc/include/private/gcconfig.h
+++ b/boehm-gc/include/private/gcconfig.h
@@ -1854,9 +1854,14 @@
 # endif
 
 # ifdef AARCH64
-#   define CPP_WORDSZ 64
+#   ifdef __ILP32__
+# define ALIGNMENT 4
+# define CPP_WORDSZ 32
+#   else
+# define ALIGNMENT 8
+# define CPP_WORDSZ 64
+#   endif
 #   define MACH_TYPE "AARCH64"
-#   define ALIGNMENT 8
 #   ifndef HBLKSIZE
 # define HBLKSIZE 4096
 #   endif
-- 
2.7.3

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [PATCH] Fix combine's simplify_shift_const_1 (PR rtl-optimization/70222)

2016-03-15 Thread Segher Boessenkool
On Tue, Mar 15, 2016 at 02:18:33PM +0100, Jakub Jelinek wrote:
> So do you prefer this instead?
> 
> 2016-03-15  Jakub Jelinek  
> 
>   PR rtl-optimization/70222
>   * combine.c (simplify_shift_const_1): For A >> B >> C LSHIFTRT
>   optimization if mode is different from result_mode, queue up masking
>   of the result in outer_op.  Formatting fix.
> 
>   * gcc.c-torture/execute/pr70222-1.c: New test.
>   * gcc.c-torture/execute/pr70222-2.c: New test.

This one looks fine, too (if it works ;-) )


Segher


> --- gcc/combine.c.jj  2016-03-14 23:18:37.958408627 +0100
> +++ gcc/combine.c 2016-03-15 14:08:34.754434506 +0100
> @@ -10524,9 +10524,19 @@ simplify_shift_const_1 (enum rtx_code co
>  && CONST_INT_P (XEXP (varop, 0))
>  && !CONST_INT_P (XEXP (varop, 1)))
>   {
> +   /* For ((unsigned) (cstULL >> count)) >> cst2 we have to make
> +  sure the result will be masked.  See PR70222.  */
> +   if (code == LSHIFTRT
> +   && mode != result_mode
> +   && !merge_outer_ops (&outer_op, &outer_const, AND,
> +GET_MODE_MASK (result_mode)
> +>> orig_count, result_mode,
> +&complement_p))
> + break;
> +
> rtx new_rtx = simplify_const_binary_operation (code, mode,
> -  XEXP (varop, 0),
> -  GEN_INT (count));
> +  XEXP (varop, 0),
> +  GEN_INT (count));
> varop = gen_rtx_fmt_ee (code, mode, new_rtx, XEXP (varop, 1));
> count = 0;
> continue;
> --- gcc/testsuite/gcc.c-torture/execute/pr70222-1.c.jj2016-03-15 
> 11:30:41.657000384 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr70222-1.c   2016-03-15 
> 11:30:41.657000384 +0100
> @@ -0,0 +1,30 @@
> +/* PR rtl-optimization/70222 */
> +
> +int a = 1;
> +unsigned int b = 2;
> +int c = 0;
> +int d = 0;
> +
> +void
> +foo ()
> +{
> +  int e = ((-(c >= c)) < b) > ((int) (-1ULL >> ((a / a) * 15)));
> +  d = -e;
> +}
> +
> +__attribute__((noinline, noclone)) void
> +bar (int x)
> +{
> +  if (x != -1)
> +__builtin_abort ();
> +}
> +
> +int
> +main ()
> +{
> +#if __CHAR_BIT__ == 8 && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
> +  foo ();
> +  bar (d);
> +#endif
> +  return 0;
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr70222-2.c.jj2016-03-15 
> 11:36:13.273366841 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr70222-2.c   2016-03-15 
> 11:36:18.156298614 +0100
> @@ -0,0 +1,20 @@
> +/* PR rtl-optimization/70222 */
> +
> +#if __CHAR_BIT__ == 8 && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
> +__attribute__((noinline, noclone)) unsigned int
> +foo (int x)
> +{
> +  unsigned long long y = -1ULL >> x;
> +  return (unsigned int) y >> 31;
> +}
> +#endif
> +
> +int
> +main ()
> +{
> +#if __CHAR_BIT__ == 8 && __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
> +  if (foo (15) != 1 || foo (32) != 1 || foo (33) != 0)
> +__builtin_abort ();
> +#endif
> +  return 0;
> +}


Re: [PATCH, match] Fix pr68714

2016-03-15 Thread Richard Henderson

On 03/15/2016 02:26 AM, Andreas Schwab wrote:

Richard Henderson  writes:


diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr68714.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr68714.c
new file mode 100644
index 000..741d311
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr68714.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+typedef int vec __attribute__((vector_size(16)));
+vec f(vec x,vec y){
+  return x

That fails on ia64:

$ grep " <= " pr68714.c.211t.optimized
   _10 = _8 <= _9 ? -1 : 0;
   _13 = _11 <= _12 ? -1 : 0;
   _16 = _14 <= _15 ? -1 : 0;


Ah, sure.  I should have simply tested the reassoc1 dump file, before generic 
vector lowering.



r~



Re: [PATCH, AArch64] atomics: prefetch the destination for write prior to ldxr/stxr loops

2016-03-15 Thread James Greenhalgh
On Mon, Mar 07, 2016 at 10:54:25PM -0800, Andrew Pinski wrote:
> On Mon, Mar 7, 2016 at 8:12 PM, Yangfei (Felix)  wrote:
> >> On Mon, Mar 7, 2016 at 7:27 PM, Yangfei (Felix)  
> >> wrote:
> >> > Hi,
> >> >
> >> > As discussed in LKML:
> >> http://lists.infradead.org/pipermail/linux-arm-kernel/2015-July/355996.html,
> >>  the
> >> cost of changing a cache line
> >> > from shared to exclusive state can be significant on aarch64 cores,
> >> especially when this is triggered by an exclusive store, since it may
> >> > result in having to retry the transaction.
> >> > This patch makes use of the "prfm PSTL1STRM" instruction to prefetch
> >> cache lines for write prior to ldxr/stxr loops generated by the ll/sc 
> >> atomic
> >> routines.
> >> > Bootstrapped on AArch64 server, is it OK?
> >>
> >>
> >> I don't think this is a good thing in general.  For an example on 
> >> ThunderX, the
> >> prefetch just adds a cycle for no benefit.  This really depends on the
> >> micro-architecture of the core and how LDXR/STXR are
> >> implemented.   So after this patch, it will slow down ThunderX.
> >>
> >> Thanks,
> >> Andrew Pinski
> >>
> >
> > Hi Andrew,
> >
> >I am not quite clear about the ThunderX micro-arch.  But, Yes, I agree
> >it depends on the micro-architecture of the core.  As the mentioned
> >kernel patch is merged upstream, I think the added prefetch instruction
> >in atomic routines is good for most of AArch64 cores in the market.  If
> >it does nothing good for ThunderX, then how about adding some checking
> >here?  I mean disabling the the generation of the prfm if we are tuning
> >for ThunderX.
> 
> No it is not just not do any good, it actually causes worse
> performance for ThunderX.  How about only doing it for the
> micro-architecture where it helps and also not do it for generic since
> it hurts ThunderX so much.

This should be a GCC 7 patch at this point, which should give us some time
to talk through whether we want this patch or not.

How bad is this for ThunderX - upthread you said one cycle penalty, but here
you suggest it hurts ThunderX more? Note that the prefetch is outside of
the LDXR/STXR loop.

Thanks,
James



Re: PING^1: [PATCH] Add TYPE_EMPTY_RECORD for C++ empty class

2016-03-15 Thread Jason Merrill
I'm concerned about how this patch changes both target-independent code 
and target-specific code, with a passing remark that other targets might 
need to make similar changes.  I'm also concerned about the effect of 
this on other languages that might not want the same change.  So, here's 
an alternative patch that implements the change in the front end (and 
includes your testcases, thanks!).


Thoughts?
commit 96d0f7ffec807b5a6b71dd2fc2f6745f441fabe0
Author: Jason Merrill 
Date:   Fri Mar 11 13:39:52 2016 -0500

	* class.c (is_really_empty_class): An unnamed bit-field doesn't
	make a class non-empty.

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index f6ad696..1027dad 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -8361,6 +8361,8 @@ is_really_empty_class (tree type)
   for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
 	if (TREE_CODE (field) == FIELD_DECL
 	&& !DECL_ARTIFICIAL (field)
+	/* An unnamed bit-field is not a data member.  */
+	&& (DECL_NAME (field) || !DECL_C_BIT_FIELD (field))
 	&& !is_really_empty_class (TREE_TYPE (field)))
 	  return false;
   return true;

commit 4a683e9e5e3b3ee824dbf86dd2ad7508ea4fdc3f
Author: Jason Merrill 
Date:   Fri Mar 11 13:40:02 2016 -0500

	Pass empty class parameters like C.

	* call.c (pass_as_empty_struct, empty_class_arg)
	(warn_empty_class_abi): New.
	(type_passed_as, build_x_va_arg): Use pass_as_empty_struct.
	(build_call_a): Use empty_class_arg, warn_empty_class_abi.
	* cp-tree.h (CPTI_EMPTY_STRUCT, empty_struct_type): New.
	* decl.c (cxx_init_decl_processing): Create empty_struct_type.
	(store_parm_decls): Use warn_empty_class_abi.

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 3ad3bd5..d7cfb99 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -214,6 +214,8 @@ static void add_candidates (tree, tree, const vec *, tree, tree,
 			tsubst_flags_t);
 static conversion *merge_conversion_sequences (conversion *, conversion *);
 static tree build_temp (tree, tree, int, diagnostic_t *, tsubst_flags_t);
+static bool pass_as_empty_struct (tree type);
+static tree empty_class_arg (tree);
 
 /* Returns nonzero iff the destructor name specified in NAME matches BASETYPE.
NAME can take many forms...  */
@@ -341,7 +343,6 @@ build_call_a (tree function, int n, tree *argarray)
   tree decl;
   tree result_type;
   tree fntype;
-  int i;
 
   function = build_addr_func (function, tf_warning_or_error);
 
@@ -379,16 +380,24 @@ build_call_a (tree function, int n, tree *argarray)
   /* Don't pass empty class objects by value.  This is useful
  for tags in STL, which are used to control overload resolution.
  We don't need to handle other cases of copying empty classes.  */
+  bool warned = false;
+  if (decl && !TREE_PUBLIC (decl))
+/* Don't warn about the ABI of a function local to this TU.  */
+warned = true;
   if (! decl || ! DECL_BUILT_IN (decl))
-for (i = 0; i < n; i++)
+for (int i = 0; i < n; i++)
   {
 	tree arg = CALL_EXPR_ARG (function, i);
-	if (is_empty_class (TREE_TYPE (arg))
-	&& ! TREE_ADDRESSABLE (TREE_TYPE (arg)))
+	tree type = TREE_TYPE (arg);
+	if (is_really_empty_class (type)
+	&& ! TREE_ADDRESSABLE (type))
 	  {
-	tree t = build0 (EMPTY_CLASS_EXPR, TREE_TYPE (arg));
-	arg = build2 (COMPOUND_EXPR, TREE_TYPE (t), arg, t);
-	CALL_EXPR_ARG (function, i) = arg;
+	location_t loc = EXPR_LOC_OR_LOC (arg, input_location);
+	CALL_EXPR_ARG (function, i) = empty_class_arg (arg);
+	/* Warn about ABI changes for a non-final argument.  */
+	if (!warned && i < n-1
+		&& warn_empty_class_abi (arg, loc))
+	  warned = true;
 	  }
   }
 
@@ -6871,6 +6880,14 @@ build_x_va_arg (source_location loc, tree expr, tree type)
   expr = build_va_arg (loc, expr, ref);
   return convert_from_reference (expr);
 }
+  else if (is_really_empty_class (type) && !TREE_ADDRESSABLE (type))
+{
+  /* Do the reverse of empty_class_arg.  */
+  tree etype = pass_as_empty_struct (type) ? empty_struct_type : type;
+  expr = build_va_arg (loc, expr, etype);
+  tree ec = build0 (EMPTY_CLASS_EXPR, type);
+  return build2 (COMPOUND_EXPR, type, expr, ec);
+}
 
   return build_va_arg (loc, expr, type);
 }
@@ -6967,6 +6984,65 @@ convert_default_arg (tree type, tree arg, tree fn, int parmnum,
   return arg;
 }
 
+/* Return true iff TYPE should be passed and returned as a size 0 type rather
+   than its normal size, for compatibility with C.  */
+
+static bool
+pass_as_empty_struct (tree type)
+{
+  return (abi_version_at_least (10)
+	  && type != error_mark_node
+	  && COMPLETE_TYPE_P (type)
+	  && !TREE_ADDRESSABLE (type)
+	  && is_really_empty_class (type));
+}
+
+/* Adjust the value VAL of empty class type TYPE for argument passing.
+   Keep this synced with build_x_va_arg.  */
+
+static tree
+empty_class_arg (tree val)
+{
+  /* Don't pass empty class objects by value.  This is useful
+ for tags

Re: [01/05] Fix PR 64411

2016-03-15 Thread Andrey Belevantsev

Hello,

On 14.03.2016 19:45, Bernd Schmidt wrote:

On 03/14/2016 05:23 PM, Alexander Monakov wrote:

On Mon, 14 Mar 2016, Andrey Belevantsev wrote:

In this case, we get an inconsistency between the sched-deps interface,
saying
we can't move an insn writing the si register through a vector insn, and
the
liveness analysis, saying we can.  The latter doesn't take into account
implicit_reg_pending_clobbers set calculated in sched-deps before register
allocation.  The solution is to reflect this set in our insn data
(sets/uses/clobbers).

Ok for trunk?


One nit; the prototype of the new function:

extern void get_implicit_reg_pending_clobbers (rtx_insn *, HARD_REG_SET *);

has source operand on the left, destination on the right; it's probably
nicer
to swap them around.

OK as far as selective scheduler changes go, but this also needs a general
scheduler maintainer ack for the sched-deps.c change.  Vladimir, can you
have
a look?


Needs better documentation of the new function's arguments (as per general
requirements for such things), but otherwise that part is ok (either arg
order). The sel-sched parts should also have proper function comments
however, and here:

+{
+  SET_REGNO_REG_SET (IDATA_REG_SETS (id), regno);
+}

we don't use braces around single statements.


I've incorporated both yours and Alexander's comments and committed the 
patch as rev. 234216.


Andrey




Bernd




Re: [05/05] Fix PR 69102

2016-03-15 Thread Andrey Belevantsev

Hello,

On 14.03.2016 12:52, Andrey Belevantsev wrote:

Hello,

The problem here is readonly dependence contexts in selective scheduler.
We're trying to cache the effect of initializing a dependence context with
remembering that context and setting a readonly bit on it.  When first
moving the insn 43 with REG_ARGS_SIZE note through the insn 3 (a simple eax
set) we also set the last_args_size field of the context.  Later, when we
make a copy of insn 43 and try to move it again through insn 3, we take the
cached dependency context and notice the (fake) dep with last_args_size
insn, which is the old insn 43.  Then the assert saying that we should be
able to lift the bookkeeping copy up the same way as we did with the
original insn breaks.

Fixed by the attached patch that makes us notice only deps with the current
producer insn.

Ok for trunk?


We've discussed the patch with Alexander a bit and decided to take the 
different approach.  The core issue here is not handling the new 
last_args_size field in the readonly contexts.  In general, the readonly 
context approach, when analyzing an insn with a readonly context, would 
create the necessary dependencies with all of the last_ fields but refrain 
from modifying those fields.  The reason is we need to capture the effect 
of only the single producer in the readonly context.  Failing to do so may 
update the last_ fields with the effect of subsequent analyses and having 
the fake dependencies with the insns that got into those fields instead of 
having the dependencies with the currently used producer.


So the right fix here is to guard setting of the last_args_size field with 
!deps->readonly test as it is done elsewhere in the sched-deps.c.  In stage 
1 we will also want to set the asserts in the sel-sched dependency hooks 
(where I have placed early returns in the previous version of the patch) 
actually checking that the dependency is always created with the current 
producer, and such cases will be caught sooner.


The new patch bootstrapped and tested on x86-64 with selective scheduler 
forced enabled with no regressions.  It needs the maintainer outside of 
sel-sched as we touch sched-deps.c file.  Ok for trunk?  The test is the 
same as in previous patch.


Andrey

2016-03-15  Andrey Belevantsev  

PR rtl-optimization/69102
* sched-deps.c (sched_analyze_insn): Do not set last_args_size field
when we have a readonly dependency context.



gcc/

2016-03-14  Andrey Belevantsev  

PR rtl-optimization/69102
* sel-sched.c (has_dependence_note_dep): Only take into
account dependencies produced by the current producer insn.
(has_dependence_note_mem_dep): Likewise.

testsuite/

2016-03-14  Andrey Belevantsev  

PR rtl-optimization/69102
* gcc.c-torture/compile/pr69102.c: New test.

Best,
Andrey



diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c
index 3d4a1d5..77ffcd0 100644
--- a/gcc/sched-deps.c
+++ b/gcc/sched-deps.c
@@ -3495,7 +3495,8 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn)
 {
   if (deps->last_args_size)
 	add_dependence (insn, deps->last_args_size, REG_DEP_OUTPUT);
-  deps->last_args_size = insn;
+  if (!deps->readonly)
+	deps->last_args_size = insn;
 }
 }
 


Re: PING^1: [PATCH] Add TYPE_EMPTY_RECORD for C++ empty class

2016-03-15 Thread H.J. Lu
On Tue, Mar 15, 2016 at 8:35 AM, Jason Merrill  wrote:
> I'm concerned about how this patch changes both target-independent code and
> target-specific code, with a passing remark that other targets might need to
> make similar changes.  I'm also concerned about the effect of this on other
> languages that might not want the same change.  So, here's an alternative
> patch that implements the change in the front end (and includes your
> testcases, thanks!).
>
> Thoughts?

On x86-64, I got

libtool: compile:
/export/build/gnu/gcc-x32/build-x86_64-linux/./gcc/xgcc -shared-libgcc
-B/export/build/gnu/gcc-x32/build-x86_64-linux/./gcc -nostdinc++
-L/export/build/gnu/gcc-x32/build-x86_64-linux/x86_64-pc-linux-gnu/libstdc++-v3/src
-L/export/build/gnu/gcc-x32/build-x86_64-linux/x86_64-pc-linux-gnu/libstdc++-v3/src/.libs
-L/export/build/gnu/gcc-x32/build-x86_64-linux/x86_64-pc-linux-gnu/libstdc++-v3/libsupc++/.libs
-B/usr/gcc-6.0.0-x32/x86_64-pc-linux-gnu/bin/
-B/usr/gcc-6.0.0-x32/x86_64-pc-linux-gnu/lib/ -isystem
/usr/gcc-6.0.0-x32/x86_64-pc-linux-gnu/include -isystem
/usr/gcc-6.0.0-x32/x86_64-pc-linux-gnu/sys-include
-I/export/gnu/import/git/sources/gcc/libstdc++-v3/../libgcc
-I/export/build/gnu/gcc-x32/build-x86_64-linux/x86_64-pc-linux-gnu/libstdc++-v3/include/x86_64-pc-linux-gnu
-I/export/build/gnu/gcc-x32/build-x86_64-linux/x86_64-pc-linux-gnu/libstdc++-v3/include
-I/export/gnu/import/git/sources/gcc/libstdc++-v3/libsupc++
-std=gnu++11 -D_GLIBCXX_SHARED -fno-implicit-templates -Wall -Wextra
-Wwrite-strings -Wcast-qual -Werror=abi -Wabi=9
-fdiagnostics-show-location=once -ffunction-sections -fdata-sections
-frandom-seed=cow-shim_facets.lo -g -O2 -D_GNU_SOURCE -c
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cow-shim_facets.cc
 -fPIC -DPIC -D_GLIBCXX_SHARED -o cow-shim_facets.o
In file included from
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cow-shim_facets.cc:35:0:
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:
In instantiation of
‘std::__facet_shims::{anonymous}::numpunct_shim<_CharT>::numpunct_shim(const
facet*, std::__facet_shims::{anonymous}::numpunct_shim<_CharT>::__cache_type*)
[with _CharT = char; std::__facet_shims::facet = std::locale::facet;
std::__facet_shims::{anonymous}::numpunct_shim<_CharT>::__cache_type =
std::__numpunct_cache]’:
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:461:20:
  required from here
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:238:25:
error: empty class ‘std::__facet_shims::other_abi {aka
std::integral_constant}’ parameter passing ABI changes in
-fabi-version=10 (GCC 6) [-Werror=abi]
__numpunct_fill_cache(other_abi{}, f, c);
~^~~
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:
In instantiation of ‘int
std::__facet_shims::{anonymous}::collate_shim<_CharT>::do_compare(const
_CharT*, const _CharT*, const _CharT*, const _CharT*) const [with
_CharT = char]’:
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:462:20:
  required from here
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:265:28:
error: empty class ‘std::__facet_shims::other_abi {aka
std::integral_constant}’ parameter passing ABI changes in
-fabi-version=10 (GCC 6) [-Werror=abi]
return __collate_compare(other_abi{}, _M_get(),
   ~^~~
lo1, hi1, lo2, hi2);
~~~
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:
In instantiation of
‘std::__facet_shims::{anonymous}::collate_shim<_CharT>::string_type
std::__facet_shims::{anonymous}::collate_shim<_CharT>::do_transform(const
_CharT*, const _CharT*) const [with _CharT = char;
std::__facet_shims::{anonymous}::collate_shim<_CharT>::string_type =
std::basic_string]’:
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:462:20:
  required from here
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:273:23:
error: empty class ‘std::__facet_shims::other_abi {aka
std::integral_constant}’ parameter passing ABI changes in
-fabi-version=10 (GCC 6) [-Werror=abi]
__collate_transform(other_abi{}, _M_get(), st, lo, hi);
~~~^~~
/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:
In instantiation of
‘std::__facet_shims::{anonymous}::moneypunct_shim<_CharT,
_Intl>::moneypunct_shim(const facet*,
std::__facet_shims::{anonymous}::moneypunct_shim<_CharT,
_Intl>::__cache_type*) [with _CharT = char; bool _Intl = true;
std::__facet_shims::facet = std::locale::facet;
std::__facet_shims::{anonymous}::moneypunct_shim<_CharT,
_Intl>::__cache_type = std::__moneypunct_cache]’:
...

-- 
H.J.


Re: [PATCH] Fix combine's simplify_shift_const_1 (PR rtl-optimization/70222)

2016-03-15 Thread Jakub Jelinek
On Tue, Mar 15, 2016 at 09:50:44AM -0500, Segher Boessenkool wrote:
> On Tue, Mar 15, 2016 at 02:18:33PM +0100, Jakub Jelinek wrote:
> > So do you prefer this instead?
> > 
> > 2016-03-15  Jakub Jelinek  
> > 
> > PR rtl-optimization/70222
> > * combine.c (simplify_shift_const_1): For A >> B >> C LSHIFTRT
> > optimization if mode is different from result_mode, queue up masking
> > of the result in outer_op.  Formatting fix.
> > 
> > * gcc.c-torture/execute/pr70222-1.c: New test.
> > * gcc.c-torture/execute/pr70222-2.c: New test.
> 
> This one looks fine, too (if it works ;-) )

Passed bootstrap/regtest on x86_64-linux and i686-linux, and triggers solely
on the new testcases, nothing else.  Committed now.

Jakub


[committed] Fix sccvn VN_GET_INFO bug (PR middle-end/70239)

2016-03-15 Thread Jakub Jelinek
Hi!

As mentioned in the PR, the problem is that we might grow the vector
without clearing new entries, while we overwrite the last entry, there might
be gaps containing garbage pointers and cause lots of weird issues.

The testcase is unfortunately too large (creduced it down to 26100 bytes)
for the testsuite.  Bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk as obvious.

2016-03-15  Jakub Jelinek  

PR middle-end/70239
* tree-ssa-sccvn.c (VN_INFO_GET): Use safe_grow_cleared instead
of safe_grow.

--- gcc/tree-ssa-sccvn.c.jj 2016-02-16 16:14:43.0 +0100
+++ gcc/tree-ssa-sccvn.c2016-03-15 13:02:40.876997092 +0100
@@ -407,7 +407,7 @@ VN_INFO_GET (tree name)
   newinfo = XOBNEW (&vn_ssa_aux_obstack, struct vn_ssa_aux);
   memset (newinfo, 0, sizeof (struct vn_ssa_aux));
   if (SSA_NAME_VERSION (name) >= vn_ssa_aux_table.length ())
-vn_ssa_aux_table.safe_grow (SSA_NAME_VERSION (name) + 1);
+vn_ssa_aux_table.safe_grow_cleared (SSA_NAME_VERSION (name) + 1);
   vn_ssa_aux_table[SSA_NAME_VERSION (name)] = newinfo;
   return newinfo;
 }

Jakub


Re: [Patch, avr]Fix multiple ICE fallout of PR 69764

2016-03-15 Thread Denis Chertykov
2016-03-10 16:46 GMT+03:00 Senthil Kumar Selvaraj
:
> Hi,
>
>This patch fixes ~230 internal compiler errors that showed up after
>the fix for PR 69764. After the patch, target backends need to
>explicitly specify mode for operand 2 of shift and rotate patterns -
>see md.texi mod at
>
> https://gcc.gnu.org/viewcvs/gcc/trunk/gcc/doc/md.texi?r1=233358&r2=233613&pathrev=233613.
>
>The avr backend had VOIDmode as the mode for rotl SPN, and this patch
>sets it to the mode of the insn to fix the problem.
>
>Reg testing shows ICE fixes and no new failures.
>
>If this is ok, could someone commit please? I don't have commit
>access.
>
> Regards
> Senthil
>
> 2016-03-10  Senthil Kumar Selvaraj  
>
> * config/avr/avr.md (rotl3): Set
> mode for operand 2.

Committed.

Denis.


Re: [C++ PATCH] Fix -fsanitize=vptr (PR c++/70147)

2016-03-15 Thread Jakub Jelinek
Hi!

Bernd E. mentioned in the PR the problem that if some subobject ctor throws,
if for -fsanitize=vptr we clear again the vtable pointers even for virtual
bases then they won't be properly destructed.

So, here is an incremental patch to the earlier patch, which will clear
the virtual base vtbl pointers only in the in-charge ctor.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Though, this brings a non-sanitizer issue, for -flifetime-dse=2
we emit a clobber of the whole subobject even in a ctor with _vtt_parm
argument, and the virtual bases at that point might live inside of the
area that is clobbered by the ctor {CLOBBER}
(both data and vtable pointers).  I'm afraid that e.g. with inlining this
could result into wrong code, e.g. DSE removing the earlier stores from the
virtual base ctor stores, because we {CLOBBER} them later on.
Shouldn't we conditionalize the -flifetime-dse=2 clobbers on __in_chrg
(if that parm is present)?

2016-03-15  Jakub Jelinek  

PR c++/70147
* cp-ubsan.c (cp_ubsan_dfs_initialize_vtbl_ptrs): Conditionalize
BINFO_VIRTUAL_P vtable clearing on current_in_charge_parm.

* g++.dg/ubsan/pr70147-2.C (C::C): Initialize A base with invalid
method call to i () as argument.  Adjust expected output.

--- gcc/cp/cp-ubsan.c.jj2016-03-15 09:25:19.0 +0100
+++ gcc/cp/cp-ubsan.c   2016-03-15 09:40:51.209005916 +0100
@@ -299,8 +299,14 @@ cp_ubsan_dfs_initialize_vtbl_ptrs (tree
 
   /* Assign NULL to the vptr.  */
   tree vtbl = build_zero_cst (TREE_TYPE (vtbl_ptr));
-  finish_expr_stmt (cp_build_modify_expr (vtbl_ptr, NOP_EXPR, vtbl,
- tf_warning_or_error));
+  tree stmt = cp_build_modify_expr (vtbl_ptr, NOP_EXPR, vtbl,
+   tf_warning_or_error);
+  if (BINFO_VIRTUAL_P (binfo))
+   stmt = build3 (COND_EXPR, void_type_node,
+  build2 (NE_EXPR, boolean_type_node,
+  current_in_charge_parm, integer_zero_node),
+  stmt, void_node);
+  finish_expr_stmt (stmt);
 }
 
   return NULL_TREE;
--- gcc/testsuite/g++.dg/ubsan/pr70147-2.C.jj   2016-03-15 09:46:24.0 
+0100
+++ gcc/testsuite/g++.dg/ubsan/pr70147-2.C  2016-03-15 09:48:04.622051166 
+0100
@@ -46,7 +46,7 @@ struct B : virtual A, public E, public F
 };
 struct C : B, virtual A
 {
-  C () {}
+  C () : A (i ()) {}
 };
 
 int
@@ -55,28 +55,22 @@ main ()
   C c;
 }
 
-// { dg-output "\[^\n\r]*pr70147-2.C:33:\[0-9]*: runtime error: member call on 
address 0x\[0-9a-fA-F]* which does not point to an object of type 
'E'(\n|\r\n|\r)" }
-// { dg-output "0x\[0-9a-fA-F]*: note: object has invalid vptr(\n|\r\n|\r)" }
-// { dg-output "  ?.. .. .. ..  ?.. .. .. ..  ?.. .. .. .. 
\[^\n\r]*(\n|\r\n|\r)" }
-// { dg-output "  ?\\^~~\[^\n\r]*(\n|\r\n|\r)" }
-// { dg-output "  ?invalid vptr(\n|\r\n|\r)" }
-// { dg-output "\[^\n\r]*pr70147-2.C:34:\[0-9]*: runtime error: member call on 
address 0x\[0-9a-fA-F]* which does not point to an object of type 
'F'(\n|\r\n|\r)" }
+// { dg-output "\[^\n\r]*pr70147-2.C:49:\[0-9]*: runtime error: member call on 
address 0x\[0-9a-fA-F]* which does not point to an object of type 
'A'(\n|\r\n|\r)" }
 // { dg-output "0x\[0-9a-fA-F]*: note: object has invalid vptr(\n|\r\n|\r)" }
 // { dg-output "  ?.. .. .. ..  ?.. .. .. ..  ?.. .. .. .. 
\[^\n\r]*(\n|\r\n|\r)" }
 // { dg-output "  ?\\^~~\[^\n\r]*(\n|\r\n|\r)" }
 // { dg-output "  ?invalid vptr\[^\n\r]*(\n|\r\n|\r)" }
-// { dg-output "\[^\n\r]*pr70147-2.C:35:\[0-9]*: runtime error: member call on 
address 0x\[0-9a-fA-F]* which does not point to an object of type 
'A'(\n|\r\n|\r)" }
+// { dg-output "\[^\n\r]*pr70147-2.C:33:\[0-9]*: runtime error: member call on 
address 0x\[0-9a-fA-F]* which does not point to an object of type 
'E'(\n|\r\n|\r)" }
 // { dg-output "0x\[0-9a-fA-F]*: note: object has invalid vptr(\n|\r\n|\r)" }
 // { dg-output "  ?.. .. .. ..  ?.. .. .. ..  ?.. .. .. .. 
\[^\n\r]*(\n|\r\n|\r)" }
 // { dg-output "  ?\\^~~\[^\n\r]*(\n|\r\n|\r)" }
-// { dg-output "  ?invalid vptr\[^\n\r]*(\n|\r\n|\r)" }
-// Note we don't catch the UB of calling g () on line 36.
-// { dg-output "\[^\n\r]*pr70147-2.C:38:\[0-9]*: runtime error: member call on 
address 0x\[0-9a-fA-F]* which does not point to an object of type 
'F'(\n|\r\n|\r)" }
+// { dg-output "  ?invalid vptr(\n|\r\n|\r)" }
+// { dg-output "\[^\n\r]*pr70147-2.C:34:\[0-9]*: runtime error: member call on 
address 0x\[0-9a-fA-F]* which does not point to an object of type 
'F'(\n|\r\n|\r)" }
 // { dg-output "0x\[0-9a-fA-F]*: note: object has invalid vptr(\n|\r\n|\r)" }
 // { dg-output "  ?.. .. .. ..  ?.. .. .. ..  ?.. .. .. .. 
\[^\n\r]*(\n|\r\n|\r)" }
 // { dg-output "  ?\\^~~\[^\n\r]*(\n|\r\n|\r)" }
 // { dg-output "  ?invalid vptr\

Re: [PATCH][SPARC] sparc: switch -fasynchronous-unwind-tables on by default.

2016-03-15 Thread Jose E. Marchesi

> Consider the attached test program.  When built with -g in sparc64-*-*
> the resulting binary contains:
> 
> - A .eh_frame segment containing CFA information for __libc_csu_init and
>   __libc_csu_fini.
> 
> - A .debug_frame segment containing CFA information for func2, func1 and
>   main.
> 
> The backtrace(3) implementation for sparc contains a simple unwinder
> that works well in most cases, but that unwinder is not used if
> libgcc_s.so can be dlopened and it provides _Unwind_Backtrace.  Now,
> _Unwind_Backtrace uses .eh_frame but not .debug_frame.  Thus,
> backtrace(3) is only useful in programs built with
> -fasynchronous-unwind-tables even if -g provides CFA info in
> .debug_frame.

How does that work for e.g. PowerPC or MIPS?  Why not do the same for SPARC?

The glibc PowerPC port doesn't use the libgcc_s unwinder to implement
backtrace().  It has a little ad-hoc unwinder.

MIPS is like x86_64: it exclusively relies on libgcc_s _Unwind_Backtrace
to unwind the stack.

As far as I can tell, -fasynchronous-unwind-tables is disabled in MIPS
by default.  Therefore unless -fasynchronous-unwind-tables is used at
build time backtrace() probably has the same problem than sparc (can't
tell for sure, as I don't have access to any mips host where to test).



C++ PATCH for c++/70209 (ICE in strip_typedefs)

2016-03-15 Thread Marek Polacek
Recently, strip_typedefs was updated to use DECL_ORIGINAL_TYPE on typedefs
because TYPE_MAIN_VARIANT wasn't sufficient in getting the underlying type.
In this case even that wasn't enough: for the attached test DECL_ORIGINAL_TYPE
of Ta [REAL_TYPE] yielded F [REAL_TYPE], but we want to get down to "float"
here.  Just calling strip_typedefs again helps (esentially we need to use
DECL_ORIGINAL_TYPE again).

Bootstrapped/regtested on x86_64-linux, ok for trunk and 5 (after say 3 days)?

2016-03-15  Marek Polacek  

PR c++/70209
* tree.c (strip_typedefs): Call strip_typedefs again on the
DECL_ORIGINAL_TYPE result.

* g++.dg/ext/attribute-may-alias-4.C: New test.

diff --git gcc/cp/tree.c gcc/cp/tree.c
index aaf9a4f..f784952 100644
--- gcc/cp/tree.c
+++ gcc/cp/tree.c
@@ -1460,9 +1460,12 @@ strip_typedefs (tree t, bool *remove_attributes)
   if (!result)
 {
   if (typedef_variant_p (t))
-   /* Explicitly get the underlying type, as TYPE_MAIN_VARIANT doesn't
-  strip typedefs with attributes.  */
-   result = TYPE_MAIN_VARIANT (DECL_ORIGINAL_TYPE (TYPE_NAME (t)));
+   {
+ /* Explicitly get the underlying type, as TYPE_MAIN_VARIANT doesn't
+strip typedefs with attributes.  */
+ result = TYPE_MAIN_VARIANT (DECL_ORIGINAL_TYPE (TYPE_NAME (t)));
+ result = strip_typedefs (result);
+   }
   else
result = TYPE_MAIN_VARIANT (t);
 }
diff --git gcc/testsuite/g++.dg/ext/attribute-may-alias-4.C 
gcc/testsuite/g++.dg/ext/attribute-may-alias-4.C
index e69de29..a459d49 100644
--- gcc/testsuite/g++.dg/ext/attribute-may-alias-4.C
+++ gcc/testsuite/g++.dg/ext/attribute-may-alias-4.C
@@ -0,0 +1,17 @@
+// PR c++/70209
+
+struct V {
+  typedef float F;
+  template  void m_fn1(S);
+};
+
+template  struct A {
+  typedef V::F Ta __attribute__((__may_alias__));
+  Ta *m_data;
+  void m_fn2(V &);
+};
+
+template <>
+void A::m_fn2(V &p) {
+  p.m_fn1(m_data);
+}

Marek


Re: [02/05] Fix PR 63384

2016-03-15 Thread Marek Polacek
On Mon, Mar 14, 2016 at 12:31:24PM +0300, Andrey Belevantsev wrote:
> Hello,
> 
> Here we're looping because we decrease the counter of the insns we still can
> issue on a DEBUG_INSN thus rendering the counter negative.  The fix is to
> not count debug insns in the corresponding code.  The selective scheduling
> is known to spoil the result of var tracking, but still it is not the reason
> to hang in there.
> 
> The toggle option used in the test seems to be the equivalent of just
> enabling var-tracking-assignments which should lead to the same situation;
> however, if specified as is, var-tracking-assignments will be disabled by
> the toplev.c:1460 code.  Maybe we also need the same treatment for
> flag_var_tracking_assignments_toggle.
> 
> Ok for trunk?
> 
> gcc/
> 
> 2016-03-14  Andrey Belevantsev  
> 
> PR rtl-optimization/63384
> * sel-sched.c (invoke_aftermath_hooks): Do not decrease issue_more on
> DEBUG_INSN_P insns.
> 
> testsuite/
> 
> 2016-03-14  Andrey Belevantsev  
> 
> PR rtl-optimization/63384
> * testsuite/g++.dg/pr63384.C: New test.
> 
> Best,
> Andrey
> 

> diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c
> index c798935..893a3e5 100644
> --- a/gcc/sel-sched.c
> +++ b/gcc/sel-sched.c
> @@ -4249,7 +4249,8 @@ invoke_aftermath_hooks (fence_t fence, rtx_insn 
> *best_insn, int issue_more)
>issue_more);
>memcpy (FENCE_STATE (fence), curr_state, dfa_state_size);
>  }
> -  else if (GET_CODE (PATTERN (best_insn)) != USE
> +  else if (! DEBUG_INSN_P (best_insn)
> +&& GET_CODE (PATTERN (best_insn)) != USE
> && GET_CODE (PATTERN (best_insn)) != CLOBBER)
>  issue_more--;
>  
> diff --git a/gcc/testsuite/g++.dg/pr63384.C b/gcc/testsuite/g++.dg/pr63384.C
> new file mode 100644
> index 000..b4e0784
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr63384.C
> @@ -0,0 +1,12 @@
> +/* { dg-do compile { target powerpc*-*-* ia64-*-* i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-O2 -fselective-scheduling2 -fsel-sched-pipelining  
> -fsel-sched-pipelining-outer-loops -fsel-sched-reschedule-pipelined 
> -fvar-tracking-assignments-toggle -ftree-vectorize" } */
> +
> +template  T **make_test_matrix() {
> + T **data = new T *;
> + for (int i = 0; i < 1000; i++)
> +;
> +}
> +
> +template  void test() { T **c = make_test_matrix(); }
> +
> +main() { test(); }

This test fails for me due to
cc1plus: warning: var-tracking-assignments changes selective scheduling

Marek


Re: [02/05] Fix PR 63384

2016-03-15 Thread Alexander Monakov
On Tue, 15 Mar 2016, Marek Polacek wrote:
> This test fails for me due to
> cc1plus: warning: var-tracking-assignments changes selective scheduling

Thanks for the heads-up Marek, and sorry for the trouble.  Like I said in the
adjacent reply, the warning is expected (I didn't realize the testsuite would
notice that, though).  I think the right fix is to simply add "-w" to
dg-options, and while we are at it, we should probably change -fvta-toggle to
just -fvta as well (because VTA is active either way, right?).

Andrey?

Thanks.
Alexander


Re: [02/05] Fix PR 63384

2016-03-15 Thread Andrey Belevantsev

On 15.03.2016 20:44, Alexander Monakov wrote:

On Tue, 15 Mar 2016, Marek Polacek wrote:

This test fails for me due to
cc1plus: warning: var-tracking-assignments changes selective scheduling


Thanks for the heads-up Marek, and sorry for the trouble.  Like I said in the
adjacent reply, the warning is expected (I didn't realize the testsuite would
notice that, though).  I think the right fix is to simply add "-w" to
dg-options, and while we are at it, we should probably change -fvta-toggle to
just -fvta as well (because VTA is active either way, right?).


Yes, the -fvta should work.  Sorry for the breakage, I guess I've misread 
the compare-tests output when also checking the run with forced sel-sched 
enabled.


I can take care of the test tomorrow morning or you can do it now.

Best,
Andrey



Andrey?

Thanks.
Alexander





Re: [PATCH] extend.texi: Expand on the perils of using the 'leaf' attribute.

2016-03-15 Thread Carlos O'Donell
On 03/14/2016 06:15 PM, Sandra Loosemore wrote:
> On 03/14/2016 12:40 PM, Carlos O'Donell wrote:
>> Using the 'leaf' attribute is difficult in certain use cases, and
>> the documentation rightly points out that signals is one such
>> problem.
>> 
>> We should additionally document the following caveats:
>> 
>> * Indirect function resolvers (thanks to Florian Weimer for
>> catching this). * Indirect function implementations * ELF symbol
>> interposition.
>> 
>> [snip]
>> 
>> gcc/ 2016-03-14  Carlos O'Donell  
>> 
>> * doc/extend.texi (Common Function Attributes): Describe ifunc
>> impact on leaf attribute.
>> 
> 
> H.  Both your patch and the original text really need some
> copy-editing to fix noun/verb agreement, punctuation, etc.  How about
> something like the attached patch?  I just threw this together and
> haven't tested this in any way, but you confirm that it builds and it
> looks OK to you, feel free to check it in.

Hey Sandra! :-)

Testing right now. I like your text better. I'll commit once I make
sure I haven't made a mistake in the formatting.

-- 
Cheers,
Carlos.


Re: [02/05] Fix PR 63384

2016-03-15 Thread Alexander Monakov
On Tue, 15 Mar 2016, Andrey Belevantsev wrote:
> On 15.03.2016 20:44, Alexander Monakov wrote:
> > On Tue, 15 Mar 2016, Marek Polacek wrote:
> > > This test fails for me due to
> > > cc1plus: warning: var-tracking-assignments changes selective scheduling
> >
> > Thanks for the heads-up Marek, and sorry for the trouble.  Like I said in
> > the adjacent reply, the warning is expected (I didn't realize the
> > testsuite would notice that, though).  I think the right fix is to simply
> > add "-w" to dg-options, and while we are at it, we should probably change
> > -fvta-toggle to just -fvta as well (because VTA is active either way,
> > right?).
> 
> Yes, the -fvta should work.  Sorry for the breakage, I guess I've misread the
> compare-tests output when also checking the run with forced sel-sched enabled.
> 
> I can take care of the test tomorrow morning or you can do it now.

Thanks for confirming — committed rev. 234227.

Alexander

C++ PATCH for c++/70141 (wrong partial specialization error)

2016-03-15 Thread Jason Merrill
The code in for_each_template_parm_r to walk into the RHS of 
TYPENAME_TYPE only when there isn't a predicate seemed nonsensical, and 
removing the condition didn't break anything in the testsuite.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit 364983121fbd1a0c79185a5d0291bcef72970613
Author: Jason Merrill 
Date:   Tue Mar 15 12:13:03 2016 -0400

	PR c++/70141
	* pt.c (for_each_template_parm_r): Always walk into TYPENAME_TYPE.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 978..724d6e9 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -8851,8 +8851,9 @@ for_each_template_parm_r (tree *tp, int *walk_subtrees, void *d)
   break;
 
 case TYPENAME_TYPE:
-  if (!fn)
-	WALK_SUBTREE (TYPENAME_TYPE_FULLNAME (t));
+  /* A template-id in a TYPENAME_TYPE might be a deduced context after
+	 partial instantiation.  */
+  WALK_SUBTREE (TYPENAME_TYPE_FULLNAME (t));
   break;
 
 case CONSTRUCTOR:
diff --git a/gcc/testsuite/g++.dg/template/partial-specialization4.C b/gcc/testsuite/g++.dg/template/partial-specialization4.C
new file mode 100644
index 000..1f2aced
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/partial-specialization4.C
@@ -0,0 +1,26 @@
+// PR c++/70141
+
+template 
+struct outer
+{
+  template 
+  struct inner
+  {
+
+  };
+};
+
+
+template 
+struct is_inner_for
+{
+  template 
+  struct predicate;
+
+  template 
+  struct predicate::template inner >
+  {
+  };
+};
+
+is_inner_for::predicate::inner > p;


Re: PING^1: [PATCH] Add TYPE_EMPTY_RECORD for C++ empty class

2016-03-15 Thread Jason Merrill

On 03/15/2016 12:00 PM, H.J. Lu wrote:

On Tue, Mar 15, 2016 at 8:35 AM, Jason Merrill  wrote:

I'm concerned about how this patch changes both target-independent code and
target-specific code, with a passing remark that other targets might need to
make similar changes.  I'm also concerned about the effect of this on other
languages that might not want the same change.  So, here's an alternative
patch that implements the change in the front end (and includes your
testcases, thanks!).

Thoughts?


On x86-64, I got

/export/gnu/import/git/sources/gcc/libstdc++-v3/src/c++11/cxx11-shim_facets.cc:273:23:
error: empty class ‘std::__facet_shims::other_abi {aka
std::integral_constant}’ parameter passing ABI changes in
-fabi-version=10 (GCC 6) [-Werror=abi]
 __collate_transform(other_abi{}, _M_get(), st, lo, hi);


Right, need to remove the -Werror=abi bit from the patch until Jonathan 
updates libstdc++.


Jason



Re: C++ PATCH to fix missing warning (PR c++/70194)

2016-03-15 Thread Jason Merrill

Let's factor out that duplicated code into a separate function.

Jason


Re: [PATCH] Fix 70199

2016-03-15 Thread Richard Henderson

On 03/15/2016 07:13 AM, Richard Biener wrote:

On Tue, Mar 15, 2016 at 4:44 AM, Richard Henderson  wrote:

The problem here is that

   void* labels[] = {
 &&l0, &&l1, &&l2
   };

gets gimplified to

   labels = *.LC0;

but .LC0 is not in the set of local decls, so that when copy_forbidden is
called during sra versioning we fail to forbid the copy.  We could set a
different flag, but I think it's easiest to just add the artificial decl to
where it can be seen.

Ok?


Hmm.  tree_output_constant_def uses the global constant pool (and not
function-scope statics).  So while for the above case with local labels
there can be no sharing and thus the decl is really "local" with non-local
labels or with other random initializers you'd have the ctor decl in
multiple local decl vectors.  Not sure if that's a problem, but at least
if you'd have

   void* labels[] = {
 &&l0, &&l1, &&l2
   };
   void* labels2[] = {
 &&l0, &&l1, &&l2
   };

you'll end up with the same constant pool decl in local-decls twice.


Yeah, but since the decl is TREE_STATIC, we'll ignore it for almost everything. 
 About the only thing I can figure that might go wrong is unused variable 
removal, where we'd remove the first copy but not look for duplicates, and so 
the variable stays in use when it isn't.  I don't *think* that can cause 
further problems.  It's not like we ever clear FORCED_LABEL even if the data 
referencing it goes away.



It's also
a bit pre-mature in the gimplifier as we only add to local-decls during
BIND expr lowering.


Yeah, I suppose.  Though for a TREE_STATIC decl it doesn't make a difference 
that we didn't put it into any BIND_EXPR.



I also wonder if outputting the constant pool decl far away from the labels
might end up with invalid asm for some targets.


No.  The pointers involved here are full address space, not reduced 
displacement pc-relative.



Well, I don't see any convenient way of fixing things here either but maybe
we can do

   if (walk_tree_without_duplicataes (&DECL_INITIAL (ctor),
has_label_address_in_static_1, cfun->decl))
 add_local_decl (cfun, ctor);

to avoid adding the decl when it is not necessary.


Sure.  Patch 1 below.


Having another struct function flag would be possible as well, or re-use
has_nonlocal_label as clearly a global static is now refering to a local
label (you'd lose optimization when 'labels' becomes unused of course).


On the other hand, the likelyhood of these labels (or the data referencing the 
labels) going away is slim.  Except for artificial test cases, the user is 
going to have taken these addresses and put them in an array for a reason.  The 
likelyhood of some stored FORCED_LABEL becoming non-forced is virtually nil.


Patch 2 below.  This second patch does have lower complexity, and doesn't have 
the duplicated entry issue you point out.


Thoughts?


r~
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index b331e41..cf50271 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -4016,6 +4016,14 @@ gimplify_init_constructor (tree *expr_p, gimple_seq 
*pre_p, gimple_seq *post_p,
 
walk_tree (&ctor, force_labels_r, NULL, NULL);
ctor = tree_output_constant_def (ctor);
+
+   /* If the ctor has a label in it, we need to remember the
+  decl so that copy_forbidden can find it.  But for anything
+  else we don't want to place the global variable on the
+  local decls list.  */
+   if (has_label_address_in_static (ctor, cfun->decl))
+ add_local_decl (cfun, ctor);
+
if (!useless_type_conversion_p (type, TREE_TYPE (ctor)))
  ctor = build1 (VIEW_CONVERT_EXPR, type, ctor);
TREE_OPERAND (*expr_p, 1) = ctor;
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr70199.c 
b/gcc/testsuite/gcc.c-torture/compile/pr70199.c
new file mode 100644
index 000..a4323f0
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr70199.c
@@ -0,0 +1,20 @@
+static volatile int v = 0;
+static
+void benchmark(long runs) {
+  void* labels[] = {
+&&l0, &&l1, &&l2
+  };
+  for(unsigned int mask = 0x1F; mask > 0; mask >>= 1) {
+unsigned lfsr = 0xACE1u;
+long n = 1000;
+while(n > 0) {
+  l2: v;
+  l1: v;
+  goto *labels[lfsr & mask];
+  l0: n--;
+}
+  }
+}
+int f(void) {
+  benchmark(1000);
+}
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index d52e0c6..cac2340 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3522,6 +3522,17 @@ has_label_address_in_static_1 (tree *nodep, int 
*walk_subtrees, void *fnp)
   return NULL_TREE;
 }
 
+/* Determine if the DECL_INITIAL of DECL makes a reference to a label that
+   is local to FNDECL.  */
+
+bool
+has_label_address_in_static (tree decl, tree fndecl)
+{
+  return walk_tree_without_duplicates (&DECL_INITIAL (decl),
+  has_label_address_in_static_1,
+  fnde

Re: C++ PATCH for c++/70209 (ICE in strip_typedefs)

2016-03-15 Thread Jason Merrill

OK.

Jason


Re: PING^1: [PATCH] Add TYPE_EMPTY_RECORD for C++ empty class

2016-03-15 Thread Joseph Myers
I'm not sure if the zero-size arrays (a GNU extension) are considered to 
make a struct non-empty, but in any case I think the tests should cover 
such arrays as elements of structs.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] Fix compiling large files

2016-03-15 Thread Richard Henderson

On 03/10/2016 08:20 PM, DJ Delorie wrote:

I'm moving on to Plan C but I put a copy of the file on
.../dj/foo.c.gz (195Mb) if anyone wants to find out
why there's a 16Gb limit compiling it...


With just the following, we successfully compile your file.

It takes about 25 minutes and memory use tops out around 40GB.
Which still seems insane for a 1.6GB input file consisting
primarily of data for a static array, but that's a
different problem.

At this point we usually have a PR to go with all stage4
changes.  But a meaningful PR is difficult to create, since
the attachment would be too large.  Perhaps a generator could
be created, but since it wouldn't go in the testsuite it seems
like a waste of time.

Thoughts?


r~


* line-map.c (new_linemap): Make alloc_size a size_t.

diff --git a/libcpp/line-map.c b/libcpp/line-map.c
index 1fb634a..80d4e6b 100644
--- a/libcpp/line-map.c
+++ b/libcpp/line-map.c
@@ -376,7 +376,7 @@ new_linemap (struct line_maps *set,
   if (LINEMAPS_USED (set, macro_map_p) == LINEMAPS_ALLOCATED (set, 
macro_map_p))
 {
   /* We ran out of allocated line maps. Let's allocate more.  */
-  unsigned alloc_size;
+  size_t alloc_size;

   /* Cast away extern "C" from the type of xrealloc.  */
   line_map_realloc reallocator = (set->reallocator



Re: PING^1: [PATCH] Add TYPE_EMPTY_RECORD for C++ empty class

2016-03-15 Thread H.J. Lu
On Tue, Mar 15, 2016 at 2:39 PM, Joseph Myers  wrote:
> I'm not sure if the zero-size arrays (a GNU extension) are considered to
> make a struct non-empty, but in any case I think the tests should cover
> such arrays as elements of structs.

There are couple tests for structs with members of array
of empty types.  testsuite/g++.dg/abi/empty14.h has

struct dummy0 { };
struct dummy { struct dummy0 d[140]; };

-- 
H.J.


Re: PING^1: [PATCH] Add TYPE_EMPTY_RECORD for C++ empty class

2016-03-15 Thread Joseph Myers
On Tue, 15 Mar 2016, H.J. Lu wrote:

> On Tue, Mar 15, 2016 at 2:39 PM, Joseph Myers  wrote:
> > I'm not sure if the zero-size arrays (a GNU extension) are considered to
> > make a struct non-empty, but in any case I think the tests should cover
> > such arrays as elements of structs.
> 
> There are couple tests for structs with members of array
> of empty types.  testsuite/g++.dg/abi/empty14.h has

My concern is the other way round - structs with elements such as 
"int a[0];", an array [0] of a nonempty type.  My reading of the subobject 
definition is that such an array should not cause the struct to be 
considered nonempty (it doesn't result in any int subobjects).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] Fix compiling large files

2016-03-15 Thread Jakub Jelinek
On Tue, Mar 15, 2016 at 03:31:44PM -0700, Richard Henderson wrote:
> On 03/10/2016 08:20 PM, DJ Delorie wrote:
> >I'm moving on to Plan C but I put a copy of the file on
> >.../dj/foo.c.gz (195Mb) if anyone wants to find out
> >why there's a 16Gb limit compiling it...
> 
> With just the following, we successfully compile your file.
> 
> It takes about 25 minutes and memory use tops out around 40GB.
> Which still seems insane for a 1.6GB input file consisting
> primarily of data for a static array, but that's a
> different problem.
> 
> At this point we usually have a PR to go with all stage4
> changes.  But a meaningful PR is difficult to create, since
> the attachment would be too large.  Perhaps a generator could
> be created, but since it wouldn't go in the testsuite it seems
> like a waste of time.
> 
> Thoughts?
> 
> 
> r~
> 
> 
>   * line-map.c (new_linemap): Make alloc_size a size_t.

Ok for stage4.

> diff --git a/libcpp/line-map.c b/libcpp/line-map.c
> index 1fb634a..80d4e6b 100644
> --- a/libcpp/line-map.c
> +++ b/libcpp/line-map.c
> @@ -376,7 +376,7 @@ new_linemap (struct line_maps *set,
>if (LINEMAPS_USED (set, macro_map_p) == LINEMAPS_ALLOCATED (set, 
> macro_map_p))
>  {
>/* We ran out of allocated line maps. Let's allocate more.  */
> -  unsigned alloc_size;
> +  size_t alloc_size;
> 
>/* Cast away extern "C" from the type of xrealloc.  */
>line_map_realloc reallocator = (set->reallocator

Jakub


[committed] Fix gfortran.dg/coarray_allocate_5.f08 on targets needing to link with libatomic

2016-03-15 Thread John David Anglin
This patch fixes failure of coarray_allocate_5.f08 on hppa.  Tested on 
hppa2.0w-hp-hpux11.11 and
hppa64-hp-hpux11.11.  Committed to trunk.

Dave
--
John David Anglin   dave.ang...@bell.net


2016-03-15  John David Anglin  

PR libfortran/69799
* gfortran.dg/coarray_allocate_5.f08: Add "-latomic" option if
libatomic_available.

Index: gfortran.dg/coarray_allocate_5.f08
===
--- gfortran.dg/coarray_allocate_5.f08  (revision 234163)
+++ gfortran.dg/coarray_allocate_5.f08  (working copy)
@@ -1,5 +1,6 @@
 ! { dg-do run }
 ! { dg-options "-fcoarray=lib -lcaf_single -fdump-tree-original" }
+! { dg-additional-options "-latomic" { target libatomic_available } }
 !
 ! Contributed by Ian Harvey  
 ! Extended by Andre Vehreschild  


[committed] Skip gcc.dg/ifcvt-4.c on hppa*64*-*-*

2016-03-15 Thread John David Anglin
See bug testsuite/68232 for justification.  Committed to trunk.

Dave
--
John David Anglin   dave.ang...@bell.net


2016-03-15  John David Anglin  

* gcc.dg/ifcvt-4.c: Add hppa*64*-*-* to skip list.

Index: gcc.dg/ifcvt-4.c
===
--- gcc.dg/ifcvt-4.c(revision 234239)
+++ gcc.dg/ifcvt-4.c(working copy)
@@ -1,6 +1,6 @@
 /* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=3" } */
 /* { dg-additional-options "-misel" { target { powerpc*-*-* } } } */
-/* { dg-skip-if "Multiple set if-conversion not guaranteed on all subtargets" 
{ "arm*-*-* visium-*-*" } {"*"} { "" } }  */
+/* { dg-skip-if "Multiple set if-conversion not guaranteed on all subtargets" 
{ "arm*-*-* hppa*64*-*-* visium-*-*" } {"*"} { "" } }  */
 
 int
 foo (int x, int y, int a)


Re: [PATCH] Fix compiling large files

2016-03-15 Thread DJ Delorie

> At this point we usually have a PR to go with all stage4
> changes.  But a meaningful PR is difficult to create, since
> the attachment would be too large.  Perhaps a generator could
> be created, but since it wouldn't go in the testsuite it seems
> like a waste of time.
> 
> Thoughts?

CPP macros grow exponentially, we could do it there, if we can get it
to preserve (inject?) line breaks.  But I wouldn't want to be the poor
developer on a 4Gb 32-bit system trying to run it...


[committed] Add -fno-common option on hppa*-*-hpux* for gcc.c-torture/execute/pr68532.c

2016-03-15 Thread John David Anglin
Another test where we need to avoid the limited alignment of common on 
hppa*-*-hpux*.
Committed to trunk.

Dave
--
John David Anglin   dave.ang...@bell.net


2016-03-15  John David Anglin  

* gcc.c-torture/execute/pr68532.c: Add -fno-common option on
hppa*-*-hpux*.

Index: gcc.c-torture/execute/pr68532.c
===
--- gcc.c-torture/execute/pr68532.c (revision 234163)
+++ gcc.c-torture/execute/pr68532.c (working copy)
@@ -1,4 +1,5 @@
 /* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
+/* { dg-additional-options "-fno-common" { target hppa*-*-hpux* } } */
 
 #define SIZE 128
 unsigned short _Alignas (16) in[SIZE];


Re: PING^1: [PATCH] Add TYPE_EMPTY_RECORD for C++ empty class

2016-03-15 Thread H.J. Lu
On Tue, Mar 15, 2016 at 3:34 PM, Joseph Myers  wrote:
> On Tue, 15 Mar 2016, H.J. Lu wrote:
>
>> On Tue, Mar 15, 2016 at 2:39 PM, Joseph Myers  
>> wrote:
>> > I'm not sure if the zero-size arrays (a GNU extension) are considered to
>> > make a struct non-empty, but in any case I think the tests should cover
>> > such arrays as elements of structs.
>>
>> There are couple tests for structs with members of array
>> of empty types.  testsuite/g++.dg/abi/empty14.h has
>
> My concern is the other way round - structs with elements such as
> "int a[0];", an array [0] of a nonempty type.  My reading of the subobject
> definition is that such an array should not cause the struct to be
> considered nonempty (it doesn't result in any int subobjects).

This is a test for struct with zero-size array, which isn't treated
as empty type.  C++ and C are compatible in its passing.

-- 
H.J.
From 549583547f8dfb284b6ae083031757371907671f Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Tue, 15 Mar 2016 17:20:08 -0700
Subject: [PATCH] Add a test for struct with zero-size array

---
 gcc/testsuite/g++.dg/abi/empty18.C  | 17 +
 gcc/testsuite/g++.dg/abi/empty18.h  |  9 +
 gcc/testsuite/g++.dg/abi/empty18a.c |  6 ++
 3 files changed, 32 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/abi/empty18.C
 create mode 100644 gcc/testsuite/g++.dg/abi/empty18.h
 create mode 100644 gcc/testsuite/g++.dg/abi/empty18a.c

diff --git a/gcc/testsuite/g++.dg/abi/empty18.C b/gcc/testsuite/g++.dg/abi/empty18.C
new file mode 100644
index 000..cf850ce
--- /dev/null
+++ b/gcc/testsuite/g++.dg/abi/empty18.C
@@ -0,0 +1,17 @@
+// PR c++/60336
+// { dg-do run }
+// { dg-options "-Wabi=9 -x c" }
+// { dg-additional-sources "empty18a.c" }
+// { dg-prune-output "command line option" }
+
+#include "empty18.h"
+extern "C" void fun(struct dummy, struct foo);
+
+int main()
+{
+  struct dummy d;
+  struct foo f = { -1, -2, -3, -4, -5 };
+
+  fun(d, f);
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/abi/empty18.h b/gcc/testsuite/g++.dg/abi/empty18.h
new file mode 100644
index 000..86e7ecd
--- /dev/null
+++ b/gcc/testsuite/g++.dg/abi/empty18.h
@@ -0,0 +1,9 @@
+struct dummy { int d[0]; };
+struct foo
+{
+  int i1;
+  int i2;
+  int i3;
+  int i4;
+  int i5;
+};
diff --git a/gcc/testsuite/g++.dg/abi/empty18a.c b/gcc/testsuite/g++.dg/abi/empty18a.c
new file mode 100644
index 000..902860b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/abi/empty18a.c
@@ -0,0 +1,6 @@
+#include "empty18.h"
+void fun(struct dummy d, struct foo f)
+{
+  if (f.i1 != -1)
+__builtin_abort();
+}
-- 
2.5.0



Re: PING^1: [PATCH] Add TYPE_EMPTY_RECORD for C++ empty class

2016-03-15 Thread Joseph Myers
On Tue, 15 Mar 2016, H.J. Lu wrote:

> On Tue, Mar 15, 2016 at 3:34 PM, Joseph Myers  wrote:
> > On Tue, 15 Mar 2016, H.J. Lu wrote:
> >
> >> On Tue, Mar 15, 2016 at 2:39 PM, Joseph Myers  
> >> wrote:
> >> > I'm not sure if the zero-size arrays (a GNU extension) are considered to
> >> > make a struct non-empty, but in any case I think the tests should cover
> >> > such arrays as elements of structs.
> >>
> >> There are couple tests for structs with members of array
> >> of empty types.  testsuite/g++.dg/abi/empty14.h has
> >
> > My concern is the other way round - structs with elements such as
> > "int a[0];", an array [0] of a nonempty type.  My reading of the subobject
> > definition is that such an array should not cause the struct to be
> > considered nonempty (it doesn't result in any int subobjects).
> 
> This is a test for struct with zero-size array, which isn't treated
> as empty type.  C++ and C are compatible in its passing.

Where is the current definition of empty types you're proposing for use in 
GCC?  Is the behavior of this case clear from that definition?

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: PING^1: [PATCH] Add TYPE_EMPTY_RECORD for C++ empty class

2016-03-15 Thread H.J. Lu
On Tue, Mar 15, 2016 at 5:25 PM, Joseph Myers  wrote:
> On Tue, 15 Mar 2016, H.J. Lu wrote:
>
>> On Tue, Mar 15, 2016 at 3:34 PM, Joseph Myers  
>> wrote:
>> > On Tue, 15 Mar 2016, H.J. Lu wrote:
>> >
>> >> On Tue, Mar 15, 2016 at 2:39 PM, Joseph Myers  
>> >> wrote:
>> >> > I'm not sure if the zero-size arrays (a GNU extension) are considered to
>> >> > make a struct non-empty, but in any case I think the tests should cover
>> >> > such arrays as elements of structs.
>> >>
>> >> There are couple tests for structs with members of array
>> >> of empty types.  testsuite/g++.dg/abi/empty14.h has
>> >
>> > My concern is the other way round - structs with elements such as
>> > "int a[0];", an array [0] of a nonempty type.  My reading of the subobject
>> > definition is that such an array should not cause the struct to be
>> > considered nonempty (it doesn't result in any int subobjects).
>>
>> This is a test for struct with zero-size array, which isn't treated
>> as empty type.  C++ and C are compatible in its passing.
>
> Where is the current definition of empty types you're proposing for use in
> GCC?  Is the behavior of this case clear from that definition?

https://gcc.gnu.org/ml/gcc/2016-03/msg00071.html

Jason's patch follows it.  Here is a test for struct with zero-size
array of empty type, which is treated as empty type.


-- 
H.J.
From 222c8fcf6518b8689ead18516ce49ba71a1c0a49 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Tue, 15 Mar 2016 19:14:30 -0700
Subject: [PATCH] Add a test for struct with zero-size array of empty type

---
 gcc/testsuite/g++.dg/abi/empty19.C  | 17 +
 gcc/testsuite/g++.dg/abi/empty19.h  | 10 ++
 gcc/testsuite/g++.dg/abi/empty19a.c |  6 ++
 3 files changed, 33 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/abi/empty19.C
 create mode 100644 gcc/testsuite/g++.dg/abi/empty19.h
 create mode 100644 gcc/testsuite/g++.dg/abi/empty19a.c

diff --git a/gcc/testsuite/g++.dg/abi/empty19.C b/gcc/testsuite/g++.dg/abi/empty19.C
new file mode 100644
index 000..489eb3a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/abi/empty19.C
@@ -0,0 +1,17 @@
+// PR c++/60336
+// { dg-do run }
+// { dg-options "-Wabi=9 -x c" }
+// { dg-additional-sources "empty14a.c" }
+// { dg-prune-output "command line option" }
+
+#include "empty19.h"
+extern "C" void fun(struct dummy, struct foo);
+
+int main()
+{
+  struct dummy d;
+  struct foo f = { -1, -2, -3, -4, -5 };
+
+  fun(d, f); // { dg-warning "empty" }
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/abi/empty19.h b/gcc/testsuite/g++.dg/abi/empty19.h
new file mode 100644
index 000..616b87b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/abi/empty19.h
@@ -0,0 +1,10 @@
+struct dummy0 { };
+struct dummy { struct dummy0 d[0]; };
+struct foo
+{
+  int i1;
+  int i2;
+  int i3;
+  int i4;
+  int i5;
+};
diff --git a/gcc/testsuite/g++.dg/abi/empty19a.c b/gcc/testsuite/g++.dg/abi/empty19a.c
new file mode 100644
index 000..767b1eb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/abi/empty19a.c
@@ -0,0 +1,6 @@
+#include "empty19.h"
+void fun(struct dummy d, struct foo f)
+{
+  if (f.i1 != -1)
+__builtin_abort();
+}
-- 
2.5.0



Re: PING^1: [PATCH] Add TYPE_EMPTY_RECORD for C++ empty class

2016-03-15 Thread Jason Merrill

On 03/15/2016 08:25 PM, Joseph Myers wrote:

On Tue, 15 Mar 2016, H.J. Lu wrote:


On Tue, Mar 15, 2016 at 3:34 PM, Joseph Myers  wrote:

On Tue, 15 Mar 2016, H.J. Lu wrote:


On Tue, Mar 15, 2016 at 2:39 PM, Joseph Myers  wrote:

I'm not sure if the zero-size arrays (a GNU extension) are considered to
make a struct non-empty, but in any case I think the tests should cover
such arrays as elements of structs.


There are couple tests for structs with members of array
of empty types.  testsuite/g++.dg/abi/empty14.h has


My concern is the other way round - structs with elements such as
"int a[0];", an array [0] of a nonempty type.  My reading of the subobject
definition is that such an array should not cause the struct to be
considered nonempty (it doesn't result in any int subobjects).


This is a test for struct with zero-size array, which isn't treated
as empty type.  C++ and C are compatible in its passing.


Where is the current definition of empty types you're proposing for use in
GCC?  Is the behavior of this case clear from that definition?


"An empty type is a type where it and all of its subobjects 
(recursively) are of structure, union, or array type.  No memory slot 
nor register should be used to pass or return an object of empty type."


It seems to me that such a struct should be considered an empty type 
under this definition, since a zero-length array has no subobjects.


Jason



Re: [C++ PATCH] Fix -fsanitize=vptr (PR c++/70147)

2016-03-15 Thread Jason Merrill

On 03/15/2016 12:24 PM, Jakub Jelinek wrote:

Bernd E. mentioned in the PR the problem that if some subobject ctor throws,
if for -fsanitize=vptr we clear again the vtable pointers even for virtual
bases then they won't be properly destructed.



So, here is an incremental patch to the earlier patch, which will clear
the virtual base vtbl pointers only in the in-charge ctor.


Right. If we aren't in charge of constructing the base, we shouldn't 
mess with its vptr either.  Both patches are OK.



Though, this brings a non-sanitizer issue, for -flifetime-dse=2
we emit a clobber of the whole subobject even in a ctor with _vtt_parm
argument, and the virtual bases at that point might live inside of the
area that is clobbered by the ctor {CLOBBER}
(both data and vtable pointers).


See my comment in the PR.

Jason



Re: [PATCH] extend.texi: Expand on the perils of using the 'leaf' attribute.

2016-03-15 Thread Carlos O'Donell
On 03/14/2016 06:15 PM, Sandra Loosemore wrote:
> On 03/14/2016 12:40 PM, Carlos O'Donell wrote:
>> Using the 'leaf' attribute is difficult in certain use cases, and
>> the documentation rightly points out that signals is one such
>> problem.
>> 
>> We should additionally document the following caveats:
>> 
>> * Indirect function resolvers (thanks to Florian Weimer for
>> catching this). * Indirect function implementations * ELF symbol
>> interposition.
>> 
>> [snip]
>> 
>> gcc/ 2016-03-14  Carlos O'Donell  
>> 
>> * doc/extend.texi (Common Function Attributes): Describe ifunc
>> impact on leaf attribute.
>> 
> 
> H.  Both your patch and the original text really need some
> copy-editing to fix noun/verb agreement, punctuation, etc.  How about
> something like the attached patch?  I just threw this together and
> haven't tested this in any way, but you confirm that it builds and it
> looks OK to you, feel free to check it in.

PDF looks good.

Committed as r234247.

2016-03-16  Carlos O'Donell  
Sandra Loosemore  

* doc/extend.texi (Common Function Attributes): Describe ifunc impact
on leaf attribute. Mention ELF interposition problems.

Index: gcc/doc/extend.texi
===
--- gcc/doc/extend.texi (revision 234236)
+++ gcc/doc/extend.texi (revision 234247)
@@ -2772,30 +2772,41 @@
 
 @item leaf
 @cindex @code{leaf} function attribute
-Calls to external functions with this attribute must return to the current
-compilation unit only by return or by exception handling.  In particular, leaf
-functions are not allowed to call callback function passed to it from the 
current
-compilation unit or directly call functions exported by the unit or longjmp
-into the unit.  Leaf function might still call functions from other compilation
-units and thus they are not necessarily leaf in the sense that they contain no
-function calls at all.
+Calls to external functions with this attribute must return to the
+current compilation unit only by return or by exception handling.  In
+particular, a leaf function is not allowed to invoke callback functions
+passed to it from the current compilation unit, directly call functions
+exported by the unit, or @code{longjmp} into the unit.  Leaf functions
+might still call functions from other compilation units and thus they
+are not necessarily leaf in the sense that they contain no function
+calls at all.
 
-The attribute is intended for library functions to improve dataflow analysis.
-The compiler takes the hint that any data not escaping the current compilation 
unit can
-not be used or modified by the leaf function.  For example, the @code{sin} 
function
-is a leaf function, but @code{qsort} is not.
+The attribute is intended for library functions to improve dataflow
+analysis.  The compiler takes the hint that any data not escaping the
+current compilation unit cannot be used or modified by the leaf
+function.  For example, the @code{sin} function is a leaf function, but
+@code{qsort} is not.
 
-Note that leaf functions might invoke signals and signal handlers might be
-defined in the current compilation unit and use static variables.  The only
-compliant way to write such a signal handler is to declare such variables
-@code{volatile}.
+Note that leaf functions might indirectly run a signal handler defined
+in the current compilation unit that uses static variables.  Similarly,
+when lazy symbol resolution is in effect, leaf functions might invoke
+indirect functions whose resolver function or implementation function is
+defined in the current compilation unit and uses static variables.  There
+is no standard-compliant way to write such a signal handler, resolver
+function, or implementation function, and the best that you can do is to
+remove the @code{leaf} attribute or mark all such static variables
+@code{volatile}.  Lastly, for ELF-based systems that support symbol
+interposition, care should be taken that functions defined in the
+current compilation unit do not unexpectedly interpose other symbols
+based on the defined standards mode and defined feature test macros;
+otherwise an inadvertent callback would be added.
 
-The attribute has no effect on functions defined within the current compilation
-unit.  This is to allow easy merging of multiple compilation units into one,
-for example, by using the link-time optimization.  For this reason the
-attribute is not allowed on types to annotate indirect calls.
+The attribute has no effect on functions defined within the current
+compilation unit.  This is to allow easy merging of multiple compilation
+units into one, for example, by using the link-time optimization.  For
+this reason the attribute is not allowed on types to annotate indirect
+calls.
 
-
 @item malloc
 @cindex @code{malloc} function attribute
 @cindex functions that behave like malloc
-- 
Cheers,
Carlos.