[PATCH] Fix PR71984 testcase

2016-08-04 Thread Richard Biener

Committed.

Richard.

2016-08-04  Richard Biener  

PR middle-end/71984
* gcc.dg/torture/pr71984.c: Guard correctness check for
little-endian.

Index: gcc/testsuite/gcc.dg/torture/pr71984.c
===
--- gcc/testsuite/gcc.dg/torture/pr71984.c  (revision 239113)
+++ gcc/testsuite/gcc.dg/torture/pr71984.c  (working copy)
@@ -15,7 +15,9 @@ int
 main ()
 {
   u8 x = foo((v64u64){0x0706050403020100UL});
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
   if (x != 5)
 __builtin_abort ();
+#endif
   return 0;
 }


Re: Fix fir PR71696 in Libiberty Demangler (6)

2016-08-04 Thread Marcel Böhme
Hi Jeff,

> Can you take care of the minor issues above, retest & repost?

Sure. I removed the whitespace nits, used XDUPVEC instead of XNEWVEC+memcpy, 
and adjusted the growing heuristics of the new array proctypevec. The revised 
patch is attached below. Bootstrapped and regression tested on 
x86_64-pc-linux-gnu and checked PR71696 is resolved.

>> +   for (i = 0; i < work -> nproctypes; i++)
>> + if (work -> proctypevec [i] == n)
>> +   success = 0;
> So presumably this doesn't happen all that often or this could get expensive
> and we'd want something more efficient for searching, right?

It seems, at least for the cases in the Demangler test suite, the loop executes 
never more than one iteration.

Index: libiberty/ChangeLog
===
--- libiberty/ChangeLog (revision 239112)
+++ libiberty/ChangeLog (working copy)
@@ -1,3 +1,20 @@
+2016-08-04  Marcel Böhme  
+
+   PR c++/71696
+   * cplus-dem.c: Prevent infinite recursion when there is a cycle
+   in the referencing of remembered mangled types.
+   (work_stuff): New stack to keep track of the remembered mangled
+   types that are currently being processed.
+   (push_processed_type): New method to push currently processed
+   remembered type onto the stack.
+   (pop_processed_type): New method to pop currently processed
+   remembered type from the stack.
+   (work_stuff_copy_to_from): Copy values of new variables.
+   (delete_non_B_K_work_stuff): Free stack memory.
+   (demangle_args): Push/Pop currently processed remembered type.
+   (do_type): Do not demangle a cyclic reference and push/pop
+   referenced remembered type.
+
 2016-07-29  Aldy Hernandez  
 
* make-relative-prefix.c (make_relative_prefix_1): Fall back to
@@ -16,7 +33,7 @@
(d_template_args_1): Split out from d_template_args.
(d_args_length): New.
 
-2016-07-13  Marcel BÃhme  
+2016-07-13  Marcel Böhme  
 
PR c++/70926
* cplus-dem.c: Handle large values and overflow when demangling
Index: libiberty/cplus-dem.c
===
--- libiberty/cplus-dem.c   (revision 239112)
+++ libiberty/cplus-dem.c   (working copy)
@@ -144,6 +144,9 @@ struct work_stuff
   string* previous_argument; /* The last function argument demangled.  */
   int nrepeats; /* The number of times to repeat the previous
   argument.  */
+  int *proctypevec; /* Indices of currently processed remembered typevecs. 
 */
+  int proctypevec_size;
+  int nproctypes;
 };
 
 #define PRINT_ANSI_QUALIFIERS (work -> options & DMGL_ANSI)
@@ -436,6 +439,10 @@ iterate_demangle_function (struct work_stuff *,
 
 static void remember_type (struct work_stuff *, const char *, int);
 
+static void push_processed_type (struct work_stuff *, int);
+
+static void pop_processed_type (struct work_stuff *);
+
 static void remember_Btype (struct work_stuff *, const char *, int, int);
 
 static int register_Btype (struct work_stuff *);
@@ -1302,6 +1309,10 @@ work_stuff_copy_to_from (struct work_stuff *to, st
   memcpy (to->btypevec[i], from->btypevec[i], len);
 }
 
+  if (from->proctypevec)
+to->proctypevec =
+  XDUPVEC (int, from->proctypevec, from->proctypevec_size);
+
   if (from->ntmpl_args)
 to->tmpl_argvec = XNEWVEC (char *, from->ntmpl_args);
 
@@ -1330,12 +1341,18 @@ delete_non_B_K_work_stuff (struct work_stuff *work
   /* Discard the remembered types, if any.  */
 
   forget_types (work);
-  if (work -> typevec != NULL)
+  if (work->typevec != NULL)
 {
-  free ((char *) work -> typevec);
-  work -> typevec = NULL;
-  work -> typevec_size = 0;
+  free ((char *) work->typevec);
+  work->typevec = NULL;
+  work->typevec_size = 0;
 }
+  if (work->proctypevec != NULL)
+{
+  free (work->proctypevec);
+  work->proctypevec = NULL;
+  work->proctypevec_size = 0;
+}
   if (work->tmpl_argvec)
 {
   int i;
@@ -3555,6 +3572,8 @@ static int
 do_type (struct work_stuff *work, const char **mangled, string *result)
 {
   int n;
+  int i;
+  int is_proctypevec;
   int done;
   int success;
   string decl;
@@ -3567,6 +3586,7 @@ do_type (struct work_stuff *work, const char **man
 
   done = 0;
   success = 1;
+  is_proctypevec = 0;
   while (success && !done)
 {
   int member;
@@ -3627,8 +3647,15 @@ do_type (struct work_stuff *work, const char **man
  success = 0;
}
  else
-   {
- remembered_type = work -> typevec[n];
+   for (i = 0; i < work->nproctypes; i++)
+ if (work -> proctypevec [i] == n)
+   success = 0;
+
+ if (success)
+   {
+ is_proctypevec = 1;
+ push_processed_type (work, n);
+ remembered_type = work->typevec[n];
  mangled = &remember

Re: fix fallout of pr22051-2.c on arm

2016-08-04 Thread Richard Biener
On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:

> Hi,
> The attached patch fixes pr22051-2.c which regressed due to
> r238754. Matthew, could you please confirm if this patch fixes the
> test-case for you ?
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> Cross tested on arm*-*-*.
> OK for trunk ?

Note that if function pointer types are really the issue then
you also need to handle METHOD_TYPE, thus sth like

  && ! FUNC_OR_METHOD_TYPE_P (TREE_TYPE (TREE_TYPE (@0))

please also add a comment for this non-obvious thing.  I believe
we should simply apply function pointer canonicalization for
comparisons early during gimplification - exposing this target
detail only during RTL expansion makes generic optimization hard.

Ok with those changes.

Thanks,
Richard.


Re: [PR70920] transform (intptr_t) x eq/ne CST to x eq/ne (typeof x) cst

2016-08-04 Thread Richard Biener
On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:

> On 3 August 2016 at 17:27, Matthew Wahab  wrote:
> > On 29/07/16 15:32, Prathamesh Kulkarni wrote:
> >>
> >> On 29 July 2016 at 12:42, Richard Biener  wrote:
> >>>
> >>> On Fri, 29 Jul 2016, Prathamesh Kulkarni wrote:
> >>>
>  On 28 July 2016 at 19:18, Richard Biener  wrote:
> >
> > On Thu, 28 Jul 2016, Prathamesh Kulkarni wrote:
> >
> >> On 28 July 2016 at 15:58, Andreas Schwab  wrote:
> >>>
> >>> On Mo, Jul 25 2016, Prathamesh Kulkarni
> >>>  wrote:
> >>>
>  diff --git a/gcc/testsuite/gcc.dg/pr70920-4.c
>  b/gcc/testsuite/gcc.dg/pr70920-4.c
>  new file mode 100644
>  index 000..dedb895
>  --- /dev/null
>  +++ b/gcc/testsuite/gcc.dg/pr70920-4.c
>  @@ -0,0 +1,21 @@
>  +/* { dg-do compile } */
>  +/* { dg-options "-O2 -fdump-tree-ccp-details
>  -Wno-int-to-pointer-cast" } */
>  +
>  +#include 
>  +
>  +void f1();
>  +void f2();
>  +
>  +void
>  +foo (int a)
>  +{
>  +  void *cst = 0;
>  +  if ((int *) a == cst)
>  +{
>  +  f1 ();
>  +  if (a)
>  + f2 ();
>  +}
>  +}
>  +
>  +/* { dg-final { scan-tree-dump "gimple_simplified to if
>  \\(_\[0-9\]* == 0\\)" "ccp1" } } */
> >>>
> >>>
> >>> This fails on all ilp32 platforms.
> >
> > [..]
> >>>
> >>>
> >>> I don't think just matching == 0 is a good idea.  I suggest to
> >>> restrict the testcase to lp64 targets and maybe add a ilp32 variant.
> >>
> >> Hi,
> >> I restricted the test-case to lp64 targets.
> >> Is this OK to commit ?
> >
> >
> > Hello,
> >
> > The test case is failing for arm-none-linux-gnueabihf.
> Oops, sorry about that.
> >
> > It is correctly skipped if the 'dg-require-effective-target lp64' you added
> > is moved to the end of the directives (after the dg-options).
> Indeed, it is skipped after moving to end.
> Is it OK to commit the attached patch ?

I believe the canonical place is after do-do but before dg-options.
Can you check if that works, too?

Richard.

> Thanks,
> Prathamesh
> >
> > Matthew
> >
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PR70920] transform (intptr_t) x eq/ne CST to x eq/ne (typeof x) cst

2016-08-04 Thread Prathamesh Kulkarni
On 4 August 2016 at 12:39, Richard Biener  wrote:
> On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:
>
>> On 3 August 2016 at 17:27, Matthew Wahab  wrote:
>> > On 29/07/16 15:32, Prathamesh Kulkarni wrote:
>> >>
>> >> On 29 July 2016 at 12:42, Richard Biener  wrote:
>> >>>
>> >>> On Fri, 29 Jul 2016, Prathamesh Kulkarni wrote:
>> >>>
>>  On 28 July 2016 at 19:18, Richard Biener  wrote:
>> >
>> > On Thu, 28 Jul 2016, Prathamesh Kulkarni wrote:
>> >
>> >> On 28 July 2016 at 15:58, Andreas Schwab  wrote:
>> >>>
>> >>> On Mo, Jul 25 2016, Prathamesh Kulkarni
>> >>>  wrote:
>> >>>
>>  diff --git a/gcc/testsuite/gcc.dg/pr70920-4.c
>>  b/gcc/testsuite/gcc.dg/pr70920-4.c
>>  new file mode 100644
>>  index 000..dedb895
>>  --- /dev/null
>>  +++ b/gcc/testsuite/gcc.dg/pr70920-4.c
>>  @@ -0,0 +1,21 @@
>>  +/* { dg-do compile } */
>>  +/* { dg-options "-O2 -fdump-tree-ccp-details
>>  -Wno-int-to-pointer-cast" } */
>>  +
>>  +#include 
>>  +
>>  +void f1();
>>  +void f2();
>>  +
>>  +void
>>  +foo (int a)
>>  +{
>>  +  void *cst = 0;
>>  +  if ((int *) a == cst)
>>  +{
>>  +  f1 ();
>>  +  if (a)
>>  + f2 ();
>>  +}
>>  +}
>>  +
>>  +/* { dg-final { scan-tree-dump "gimple_simplified to if
>>  \\(_\[0-9\]* == 0\\)" "ccp1" } } */
>> >>>
>> >>>
>> >>> This fails on all ilp32 platforms.
>> >
>> > [..]
>> >>>
>> >>>
>> >>> I don't think just matching == 0 is a good idea.  I suggest to
>> >>> restrict the testcase to lp64 targets and maybe add a ilp32 variant.
>> >>
>> >> Hi,
>> >> I restricted the test-case to lp64 targets.
>> >> Is this OK to commit ?
>> >
>> >
>> > Hello,
>> >
>> > The test case is failing for arm-none-linux-gnueabihf.
>> Oops, sorry about that.
>> >
>> > It is correctly skipped if the 'dg-require-effective-target lp64' you added
>> > is moved to the end of the directives (after the dg-options).
>> Indeed, it is skipped after moving to end.
>> Is it OK to commit the attached patch ?
>
> I believe the canonical place is after do-do but before dg-options.
> Can you check if that works, too?
Yes that works. Should I commit the attached patch ?

Thanks,
Prathamesh
>
> Richard.
>
>> Thanks,
>> Prathamesh
>> >
>> > Matthew
>> >
>>
>
> --
> Richard Biener 
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
> 21284 (AG Nuernberg)
diff --git a/gcc/testsuite/gcc.dg/pr70920-4.c b/gcc/testsuite/gcc.dg/pr70920-4.c
index ab2748b..e9c2b95 100644
--- a/gcc/testsuite/gcc.dg/pr70920-4.c
+++ b/gcc/testsuite/gcc.dg/pr70920-4.c
@@ -1,5 +1,5 @@
-/* { dg-require-effective-target lp64 } */
 /* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
 /* { dg-options "-O2 -fdump-tree-forwprop-details -Wno-int-to-pointer-cast" } 
*/
 
 #include 


Re: [LTO] Add wide_int streaming support

2016-08-04 Thread Richard Biener
On Thu, Aug 4, 2016 at 6:12 AM, kugan  wrote:
> Hi,
>
> During IPA-VRP implementation, I realized that we don't support streaming
> wide_int in LTO. Attached patch does this. Tested with IPA-VRP. Is this OK
> for trunk if bootstrap and regression testing is fine.

Hmm, those functions belong to data-streamer-{in,out}.c and data-streamer.h
and should be named streamer_write_wide_int / streamer_read_wide_int.

Note that we already have (non-exported) streamer_write_wi / streamer_read_wi
which operate on widest_ints.  Those also reside in lto-streamer-{in,out}.c and
should be moved to data-streamer.h (and be renamed to
streamer_write_widest_int).

There is no need to add additional hooks.

Can you do this please?

Thanks,
Richard.

> Thanks,
> Kugan
>
> gcc/ChangeLog:
>
> 2016-08-04  Kugan Vivekanandarajah  
>
> * lto-streamer-in.c (lto_input_wide_int): New.
> * lto-streamer-out.c (lto_output_wide_int): Likewise.
> * lto-streamer.c (lto_streamer_hooks_init): Init write_wide_int and
> read_wide_int.
> * lto-streamer.h: Declare lto_input_wide_int and
> lto_output_wide_int.
> * streamer-hooks.h (struct streamer_hooks): Add write_wide_int and
> read_wide_int.
> (stream_write_wide_int): New macro.
> (stream_read_wide_int): Likewise.
>


Re: [PATCH 6/9] sel-sched: Don't mess with register restores

2016-08-04 Thread Andrey Belevantsev
Hello,

On 01.08.2016 4:42, Segher Boessenkool wrote:
> If selective scheduling copies register restores it confuses dwarf2cfi.
> 
> 2016-06-07  Segher Boessenkool  
> 
>   * sel-sched-ir.c (init_global_and_expr_for_insn): Don't copy
>   instructions with a REG_CFA_RESTORE note.

OK from sel-sched POV.

Best,
Andrey

> ---
>  gcc/sel-sched-ir.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
> index 83f813a..4a3984a 100644
> --- a/gcc/sel-sched-ir.c
> +++ b/gcc/sel-sched-ir.c
> @@ -3015,6 +3015,7 @@ init_global_and_expr_for_insn (insn_t insn)
>/* TRAP_IF though have an INSN code is control_flow_insn_p ().  */
>|| control_flow_insn_p (insn)
>|| volatile_insn_p (PATTERN (insn))
> +   || find_reg_note (insn, REG_CFA_RESTORE, NULL)
>|| (targetm.cannot_copy_insn_p
>&& targetm.cannot_copy_insn_p (insn)))
>  force_unique_p = true;
> 



Re: [RFC] ipa bitwise constant propagation

2016-08-04 Thread Richard Biener
On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:

> Hi,
> This is a prototype patch for propagating known/unknown bits 
> inter-procedurally.
> for integral types which propagates info obtained from get_nonzero_bits ().
> 
> Patch required making following changes:
> a) To make info from get_nonzero_bits() available to ipa, I had to remove
> guard !nonzero_p in ccp_finalize. However that triggered the following ICE
> in get_ptr_info() for default_none.f95 (and several other fortran tests)
> with options: -fopenacc -O2
> ICE: http://pastebin.com/KjD7HMQi
> I confirmed with Richard that this was a latent issue.

Can you plase bootstrap/test the fix for this separately?  (doesn't
seem to be included in this patch btw)

> b) I chose widest_int for representing value, mask in ipcp_bits_lattice
> and correspondingly changed declarations for
> bit_value_unop_1/bit_value_binop_1 to take
> precision and sign instead of type (those are the only two fields that
> were used). Both these functions are exported by tree-ssa-ccp.h
> I hope that's ok ?

That's ok, but please change the functions to overloads of
bit_value_binop / bit_value_unop to not export ugly _1 names.

-  signop sgn = TYPE_SIGN (type);
-  int width = TYPE_PRECISION (type);
+  signop sgn = type_sgn;
+  int width = (int) type_precision;

please adjust parameter names to get rid of those now unnecessary
locals (and make the precision parameter an 'int').

> c) Changed streamer_read_wi/streamer_write_wi to non-static.
> Ah I see Kugan has submitted a patch for this, so I will drop this hunk.

But he streams wide_int, not widest_int.  I followed up on his
patch.

> d) We have following in tree-ssa-ccp.c:get_default_value ():
>   if (flag_tree_bit_ccp)
> {
>   wide_int nonzero_bits = get_nonzero_bits (var);
>   if (nonzero_bits != -1)
> {
>   val.lattice_val = CONSTANT;
>   val.value = build_zero_cst (TREE_TYPE (var));
>   val.mask = extend_mask (nonzero_bits);
> }
> 
> extend_mask() sets all upper bits to 1 in nonzero_bits, ie, varying
> in terms of bit-ccp.
> I suppose in tree-ccp we need to extend mask if var is parameter since we 
> don't
> know in advance what values it will receive from different callers and mark 
> all
> upper bits as 1 to be safe.

Not sure, it seems to me that we can zero-extend for unsigned types
and sign-extend for signed types (if the "sign"-bit of nonzero_bits
is one it properly makes higher bits undefined).  Can you change
the code accordingly?  (simply give extend_mask a sign-op and use
that appropriately?)  Please split out this change so it can be
tested separately.

> However I suppose with ipa, we can determine exactly which bits of
> parameter are constant and
> setting all upper bits to 1 will become unnecessary ?
> 
> For example, consider following artificial test-case:
> int f(int x)
> {
>   if (x > 300)
> return 1;
>   else
> return 2;
> }
> 
> int main(int argc, char **argv)
> {
>   return f(argc & 0xc) + f (argc & 0x3);
> }
> 
> For x, the mask would be meet of:
> <0, 0xc> meet <0, 0x3> == (0x3 | 0xc) | (0 ^ 0) == 0xf
> and ipcp_update_bits() sets nonzero_bits for x to 0xf.
> However get_default_value then calls extend_mask (0xf), resulting in
> all upper bits
> being set to 1 and consequently the condition if (x > 300) doesn't get folded.

But then why would the code trying to optimize the comparison look at
bits that are outside of the precision?  (where do we try to use this
info?  I see that VRP misses to use nonzero bits if no range info
is present - I suppose set_nonzero_bits misses to eventually adjust
the range.

That said, where is the folding code and why does it care for those
"uninteresting" bits at all?

> To resolve this, I added a new flag "set_by_ipa" to decl_common,
> which is set to true if the mask of parameter is determined by ipa-cp,
> and the condition changes to:
> 
> if (SSA_NAME_VAR (var)
> && TREE_CODE (SSA_NAME_VAR (var)) == PARM_DECL
> && DECL_SET_BY_IPA (SSA_NAME_VAR (var))
>   val.mask = widest_int::from (nonzero_bits,
>   TYPE_SIGN (TREE_TYPE (SSA_NAME_VAR (var)));
> else
>   val.mask = extend_mask (nonzero_bits);
> 
> I am not sure if adding a new flag to decl_common is a good idea. How
> do other ipa passes deal with this/similar issue ?
> 
> I suppose we would want to gate this on some flag, say -fipa-bit-cp ?
> I haven't yet gated it on the flag, will do in next version of patch.
> I have added some very simple test-cases, I will try to add more
> meaningful ones.

See above - we should avoid needing this.

> Patch passes bootstrap+test on x86_64-unknown-linux-gnu
> and cross-tested on arm*-*-* and aarch64*-*-* with the exception
> of some fortran tests failing due to above ICE.
> 
> As next steps, I am planning to extend it to handle alignment propagation,
> and do further testing (lto-bootstrap, chromium).
> I would be 

Re: [PR70920] transform (intptr_t) x eq/ne CST to x eq/ne (typeof x) cst

2016-08-04 Thread Richard Biener
On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:

> On 4 August 2016 at 12:39, Richard Biener  wrote:
> > On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:
> >
> >> On 3 August 2016 at 17:27, Matthew Wahab  
> >> wrote:
> >> > On 29/07/16 15:32, Prathamesh Kulkarni wrote:
> >> >>
> >> >> On 29 July 2016 at 12:42, Richard Biener  wrote:
> >> >>>
> >> >>> On Fri, 29 Jul 2016, Prathamesh Kulkarni wrote:
> >> >>>
> >>  On 28 July 2016 at 19:18, Richard Biener  wrote:
> >> >
> >> > On Thu, 28 Jul 2016, Prathamesh Kulkarni wrote:
> >> >
> >> >> On 28 July 2016 at 15:58, Andreas Schwab  wrote:
> >> >>>
> >> >>> On Mo, Jul 25 2016, Prathamesh Kulkarni
> >> >>>  wrote:
> >> >>>
> >>  diff --git a/gcc/testsuite/gcc.dg/pr70920-4.c
> >>  b/gcc/testsuite/gcc.dg/pr70920-4.c
> >>  new file mode 100644
> >>  index 000..dedb895
> >>  --- /dev/null
> >>  +++ b/gcc/testsuite/gcc.dg/pr70920-4.c
> >>  @@ -0,0 +1,21 @@
> >>  +/* { dg-do compile } */
> >>  +/* { dg-options "-O2 -fdump-tree-ccp-details
> >>  -Wno-int-to-pointer-cast" } */
> >>  +
> >>  +#include 
> >>  +
> >>  +void f1();
> >>  +void f2();
> >>  +
> >>  +void
> >>  +foo (int a)
> >>  +{
> >>  +  void *cst = 0;
> >>  +  if ((int *) a == cst)
> >>  +{
> >>  +  f1 ();
> >>  +  if (a)
> >>  + f2 ();
> >>  +}
> >>  +}
> >>  +
> >>  +/* { dg-final { scan-tree-dump "gimple_simplified to if
> >>  \\(_\[0-9\]* == 0\\)" "ccp1" } } */
> >> >>>
> >> >>>
> >> >>> This fails on all ilp32 platforms.
> >> >
> >> > [..]
> >> >>>
> >> >>>
> >> >>> I don't think just matching == 0 is a good idea.  I suggest to
> >> >>> restrict the testcase to lp64 targets and maybe add a ilp32 variant.
> >> >>
> >> >> Hi,
> >> >> I restricted the test-case to lp64 targets.
> >> >> Is this OK to commit ?
> >> >
> >> >
> >> > Hello,
> >> >
> >> > The test case is failing for arm-none-linux-gnueabihf.
> >> Oops, sorry about that.
> >> >
> >> > It is correctly skipped if the 'dg-require-effective-target lp64' you 
> >> > added
> >> > is moved to the end of the directives (after the dg-options).
> >> Indeed, it is skipped after moving to end.
> >> Is it OK to commit the attached patch ?
> >
> > I believe the canonical place is after do-do but before dg-options.
> > Can you check if that works, too?
> Yes that works. Should I commit the attached patch ?

Yes.

Richard.

> Thanks,
> Prathamesh
> >
> > Richard.
> >
> >> Thanks,
> >> Prathamesh
> >> >
> >> > Matthew
> >> >
> >>
> >
> > --
> > Richard Biener 
> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
> > 21284 (AG Nuernberg)
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [LTO] Add wide_int streaming support

2016-08-04 Thread kugan

Hi Richard,

Thanks for the review.

On 04/08/16 17:26, Richard Biener wrote:

On Thu, Aug 4, 2016 at 6:12 AM, kugan  wrote:

Hi,

During IPA-VRP implementation, I realized that we don't support streaming
wide_int in LTO. Attached patch does this. Tested with IPA-VRP. Is this OK
for trunk if bootstrap and regression testing is fine.


Hmm, those functions belong to data-streamer-{in,out}.c and data-streamer.h
and should be named streamer_write_wide_int / streamer_read_wide_int.

Note that we already have (non-exported) streamer_write_wi / streamer_read_wi
which operate on widest_ints.  Those also reside in lto-streamer-{in,out}.c and
should be moved to data-streamer.h (and be renamed to
streamer_write_widest_int).


I have now streamer_write_wide_int and streamer_write_widest_int. 
Similarly for reading. There is lot of similarity. I am not very 
familiar with wide_int so kept it that way. Is this OK now?


Thanks,
Kugan

gcc/ChangeLog:

2016-08-04  Kugan Vivekanandarajah  

* data-streamer-in.c (streamer_read_wide_int): New.
(streamer_read_widest_int): Renamed function.
* data-streamer-out.c (streamer_write_wide_int): New
(streamer_write_widest_int): Renamed function.
* lto-streamer-in.c (streamer_read_wi): Renamed and moved to
data-stream-in.c.
(input_cfg): Call renamed function.
* lto-streamer-out.c (streamer_write_wi): Renamed and moved to
data-stream-out.c.
(output_cfg): Call renamed function.
* data-streamer.h: Add declarations.


There is no need to add additional hooks.

Can you do this please?

Thanks,
Richard.


Thanks,
Kugan

gcc/ChangeLog:

2016-08-04  Kugan Vivekanandarajah  

* lto-streamer-in.c (lto_input_wide_int): New.
* lto-streamer-out.c (lto_output_wide_int): Likewise.
* lto-streamer.c (lto_streamer_hooks_init): Init write_wide_int and
read_wide_int.
* lto-streamer.h: Declare lto_input_wide_int and
lto_output_wide_int.
* streamer-hooks.h (struct streamer_hooks): Add write_wide_int and
read_wide_int.
(stream_write_wide_int): New macro.
(stream_read_wide_int): Likewise.

>From fb2561bcdaf656c464b98dad28db96fcdf74af17 Mon Sep 17 00:00:00 2001
From: Kugan Vivekanandarajah 
Date: Thu, 4 Aug 2016 11:54:00 +1000
Subject: [PATCH 6/8] Add wide_int streaming support

---
 gcc/data-streamer-in.c  | 31 +++
 gcc/data-streamer-out.c | 27 +++
 gcc/data-streamer.h |  4 
 gcc/lto-streamer-in.c   | 21 +++--
 gcc/lto-streamer-out.c  | 20 +++-
 5 files changed, 68 insertions(+), 35 deletions(-)

diff --git a/gcc/data-streamer-in.c b/gcc/data-streamer-in.c
index 2625af6..8664a86 100644
--- a/gcc/data-streamer-in.c
+++ b/gcc/data-streamer-in.c
@@ -184,3 +184,34 @@ streamer_read_gcov_count (struct lto_input_block *ib)
   gcc_assert (ret >= 0);
   return ret;
 }
+
+/* Read the physical representation of a wide_int val from
+   input block IB.  */
+
+wide_int
+streamer_read_wide_int (struct lto_input_block *ib)
+{
+  HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
+  int i;
+  int prec = streamer_read_uhwi (ib);
+  int len = streamer_read_uhwi (ib);
+  for (i = 0; i < len; i++)
+a[i] = streamer_read_hwi (ib);
+  return wide_int_storage::from_array (a, len, prec);
+}
+
+/* Read the physical representation of a widest_int val from
+   input block IB.  */
+
+widest_int
+streamer_read_widest_int (struct lto_input_block *ib)
+{
+  HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
+  int i;
+  int prec ATTRIBUTE_UNUSED = streamer_read_uhwi (ib);
+  int len = streamer_read_uhwi (ib);
+  for (i = 0; i < len; i++)
+a[i] = streamer_read_hwi (ib);
+  return widest_int::from_array (a, len);
+}
+
diff --git a/gcc/data-streamer-out.c b/gcc/data-streamer-out.c
index e476530..3dd423b 100644
--- a/gcc/data-streamer-out.c
+++ b/gcc/data-streamer-out.c
@@ -375,3 +375,30 @@ streamer_write_data_stream (struct lto_output_stream *obs, const void *data,
 }
 }
 
+/* Emit the physical representation of wide_int VAL to output block OB.  */
+
+void
+streamer_write_wide_int (struct output_block *ob, const wide_int &val)
+{
+  int len = val.get_len ();
+
+  streamer_write_uhwi (ob, val.get_precision ());
+  streamer_write_uhwi (ob, len);
+  for (int i = 0; i < len; i++)
+streamer_write_hwi (ob, val.elt (i));
+}
+
+/* Emit the physical representation of widest_int W to output block OB.  */
+
+void
+streamer_write_widest_int (struct output_block *ob,
+			   const widest_int &w)
+{
+  int len = w.get_len ();
+
+  streamer_write_uhwi (ob, w.get_precision ());
+  streamer_write_uhwi (ob, len);
+  for (int i = 0; i < len; i++)
+streamer_write_hwi (ob, w.elt (i));
+}
+
diff --git a/gcc/data-streamer.h b/gcc/data-streamer.h
index 0048f66..ff479a6 100644
--- a/gcc/data-streamer.h
+++ b/gcc/data-streamer.h
@@ -69,6 +69,8 @@ void streamer_write_hwi_stream (struct lto_output_stream *, HOST_WIDE_INT);
 void str

Re: [PATCH] Teach VRP to truncate the case ranges of a switch

2016-08-04 Thread Richard Biener
On Thu, Aug 4, 2016 at 4:30 AM, Patrick Palka  wrote:
> On Wed, 3 Aug 2016, David Malcolm wrote:
>
>> On Wed, 2016-08-03 at 15:47 +0200, Richard Biener wrote:
>> > On Wed, Aug 3, 2016 at 6:00 AM, Patrick Palka 
>> > wrote:
>> > > VRP currently has functionality to eliminate case labels that lie
>> > > completely outside of the switch operand's value range.  This patch
>> > > complements this functionality by teaching VRP to also truncate the
>> > > case
>> > > label ranges that partially overlap with the operand's value range.
>> > >
>> > > Bootstrapped and regtested on x86_64-pc-linux-gnu.  Does this look
>> > > like
>> > > a reasonable optimization?  Admittedly, its effect will almost
>> > > always be
>> > > negligible except in cases where a case label range spans a large
>> > > number
>> > > of values which is a pretty rare thing.  The optimization triggered
>> > > about 250 times during bootstrap.
>> >
>> > I think it's most useful when the range collapses to a single value.
>> >
>> > Ok.
>>
>> Is this always an improvement?   I can see that it can simplify things,
>> eliminate dead code etc, but could it make evaluating the switch less
>> efficient?
>>
>> Consider e.g.
>>
>>  void
>>  test (char ch)
>>  {
>>if (ch > 17)
>>  return;
>>
>>switch (ch)
>>  {
>>  case 0:
>>foo (); break;
>>
>>  case 1 .. 255:
>>bar (); break;
>>  }
>>  }
>>
>> which (assuming this could survive this far in this form) previously
>> could be implemented as a simple "if (ch == 0)" but with this would get
>> simplified to:
>>
>>  void
>>  test (char ch)
>>  {
>>if (ch > 17)
>>  return;
>>
>>switch (ch)
>>  {
>>  case 0:
>>foo (); break;
>>
>>  case 1 .. 17:
>>bar (); break;
>>  }
>>  }
>>
>> which presumably introduces a compare against 17 in the implementation of 
>> the switch; does the new compare get optimized away by jump threading?
>
> In this particular example the final code does get worse with the patch
> for the reason you mentioned:
>
> Before:After:
> test:  test:
> .LFB0: .LFB0:
> .cfi_startproc .cfi_startproc
> cmpb$17, %dil  cmpb$17, %dil
> ja  .L1ja  .L1
> xorl%eax, %eax subl$1, %edi
> cmpb$1, %dil   xorl%eax, %eax
> jb  .L7cmpb$16, %dil
> jmp barja  .L7
> .p2align 4,,10 jmp bar
> .p2align 3 .p2align 4,,10
> .L7:   .p2align 3
> jmp foo.L7:
> .p2align 4,,10 jmp foo
> .p2align 3 .p2align 4,,10
> .L1:   .p2align 3
> rep ret.L1:
> .cfi_endproc   rep ret
>.cfi_endproc
>
> What's weird is that during gimplification the switch gets simplified to
>
>   switch (ch)
>   {
> default: foo (); break;
> case 1 ... 255: bar (); break;
>   }
>
> but if anything I would have expected it to get simplified to
>
>   switch (ch)
>   {
> case 0: foo (); break;
> default: bar (); break;
>   }
>
> In general, when case labels are exhaustive, maybe it would be better to
> designate the case label that has the widest range as the default label?
> (Currently preprocess_case_label_vec_for_gimple() just designates the
> very first label to be the default label.)  That would fix this
> particular regression at least.

Yes, that looks useful - though I wonder how easy it is to detect for the
cases where there are more than one case/default.

Richard.


Re: libgo patch committed: Update to 1.7rc3

2016-08-04 Thread Uros Bizjak
On Thu, Aug 4, 2016 at 12:53 AM, Ian Lance Taylor  wrote:
> On Thu, Jul 28, 2016 at 4:24 AM, Uros Bizjak  wrote:
>>
>> A new testsuite failure is introduced:
>>
>> FAIL: text/template
>>
>> on both, x86_64-linux-gnu and alpha-linux-gnu.
>>
>> The testcase corrupts stack with a too deep recursion.
>>
>> There is a part in libgo/go/text/template/exec.go that should handle
>> this situaiton:
>>
>> // maxExecDepth specifies the maximum stack depth of templates within
>> // templates. This limit is only practically reached by accidentally
>> // recursive template invocations. This limit allows us to return
>> // an error instead of triggering a stack overflow.
>> const maxExecDepth = 10
>>
>> but the limit is either set too high, or the error handling code is
>> inefficient on both, split-stack (x86_64) and non-split-stack (alpha)
>> targets. Lowering this value to 1 "fixes" the testcase on both
>> targets.
>
> I can not recreate this problem on x86 or x86_64.
>
> Does this patch work around the problem on Alpha?

Yes, the patch "fixes" the problem on alpha, but I still see the
failure on x86_64, even with the unlimited stack.

This is on Fedora 24, x86_64:

$ gmake -j 12 text/template/check
/home/uros/gcc-svn/trunk/libgo/testsuite/gotest: line 622:  4556
Segmentation fault  (core dumped) ./a.out -test.short
-test.timeout=${timeout}s "$@"
FAIL: text/template
Makefile:6285: recipe for target 'text/template/check' failed
gmake: *** [text/template/check] Error 1
$ ulimit -s
unlimited

Running the test under gdb:

(gdb) r
Starting program:
/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/a.out
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
[New Thread 0x2e93c700 (LWP 31972)]
[New Thread 0x2f740700 (LWP 31973)]
[New Thread 0x2f941700 (LWP 31974)]

Thread 1 "a.out" received signal SIGSEGV, Segmentation fault.
__generic_morestack (pframe_size=0x2ff04170,
old_stack=0x2ff04190, param_size=0) at
/home/uros/gcc-svn/trunk/libgcc/generic-morestack.c:573
573   current->old_stack = old_stack;
(gdb) c
Continuing.

Thread 1 "a.out" received signal SIGSEGV, Segmentation fault.
__generic_morestack (pframe_size=0x2ff03aa0,
old_stack=0x2ff03ac0, param_size=0) at
/home/uros/gcc-svn/trunk/libgcc/generic-morestack.c:573
573   current->old_stack = old_stack;
(gdb) c
Continuing.
[Thread 0x2f941700 (LWP 31974) exited]
[Thread 0x2f740700 (LWP 31973) exited]
[Thread 0x2e93c700 (LWP 31972) exited]

Program terminated with signal SIGSEGV, Segmentation fault.
The program no longer exists.
(gdb)


Manually running the testcase:

[uros@localhost test]$ LD_LIBRARY_PATH=../../.libs ./a.out
fatal error: unexpected signal during runtime execution
[signal 0xb code=0x1 addr=0x47]

runtime stack:
runtime_dopanic
/home/uros/gcc-svn/trunk/libgo/runtime/panic.c:135
runtime_throw
/home/uros/gcc-svn/trunk/libgo/runtime/panic.c:193
sig_panic_leadin
/home/uros/gcc-svn/trunk/libgo/runtime/go-signal.c:249
sig_panic_info_handler
/home/uros/gcc-svn/trunk/libgo/runtime/go-signal.c:283

:0
scanblock
/home/uros/gcc-svn/trunk/libgo/runtime/mgc0.c:1005
gc
/home/uros/gcc-svn/trunk/libgo/runtime/mgc0.c:2271
mgc
/home/uros/gcc-svn/trunk/libgo/runtime/mgc0.c:2215
runtime_mstart
/home/uros/gcc-svn/trunk/libgo/runtime/proc.c:1076

goroutine 392 [garbage collection]:
runtime_mcall
/home/uros/gcc-svn/trunk/libgo/runtime/proc.c:295
runtime_gc
/home/uros/gcc-svn/trunk/libgo/runtime/mgc0.c:2191
runtime_mallocgc
/home/uros/gcc-svn/trunk/libgo/runtime/malloc.goc:259
text_template.walkTemplate.pN19_text_template.state

/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/exec.go:382
text_template.walk.pN19_text_template.state

/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/exec.go:239
text_template.walk.pN19_text_template.state

/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/exec.go:234
text_template.walkTemplate.pN19_text_template.state

/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/exec.go:383
text_template.walk.pN19_text_template.state

/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/exec.go:239
text_template.walk.pN19_text_template.state

/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/exec.go:234
text_template.walkTemplate.pN19_text_template.state

/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/exec.go:383
text_template.walk.pN19_text_template.state

[...]


/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/exec.go:239
text_template.walk.pN19_text_template.state

/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/exec.go:234
text_template.walkTemplate.pN19_text_template.state

/ssd/uros/gcc-build/x86_64-pc-linux-gnu/libgo/gotest26048/test/exec.g

Re: [LTO] Add wide_int streaming support

2016-08-04 Thread Richard Biener
On Thu, Aug 4, 2016 at 10:09 AM, kugan
 wrote:
> Hi Richard,
>
> Thanks for the review.
>
> On 04/08/16 17:26, Richard Biener wrote:
>>
>> On Thu, Aug 4, 2016 at 6:12 AM, kugan 
>> wrote:
>>>
>>> Hi,
>>>
>>> During IPA-VRP implementation, I realized that we don't support streaming
>>> wide_int in LTO. Attached patch does this. Tested with IPA-VRP. Is this
>>> OK
>>> for trunk if bootstrap and regression testing is fine.
>>
>>
>> Hmm, those functions belong to data-streamer-{in,out}.c and
>> data-streamer.h
>> and should be named streamer_write_wide_int / streamer_read_wide_int.
>>
>> Note that we already have (non-exported) streamer_write_wi /
>> streamer_read_wi
>> which operate on widest_ints.  Those also reside in
>> lto-streamer-{in,out}.c and
>> should be moved to data-streamer.h (and be renamed to
>> streamer_write_widest_int).
>
>
> I have now streamer_write_wide_int and streamer_write_widest_int. Similarly
> for reading. There is lot of similarity. I am not very familiar with
> wide_int so kept it that way. Is this OK now?

I also thought about merging both cases but I don't see how it is
easily possible
in a way that would reduce the number of source lines (you could split out an
inline worker, but that wouldn't save anything I think)

Yes.

Thanks,
Richard.

> Thanks,
> Kugan
>
> gcc/ChangeLog:
>
> 2016-08-04  Kugan Vivekanandarajah  
>
> * data-streamer-in.c (streamer_read_wide_int): New.
> (streamer_read_widest_int): Renamed function.
> * data-streamer-out.c (streamer_write_wide_int): New
> (streamer_write_widest_int): Renamed function.
> * lto-streamer-in.c (streamer_read_wi): Renamed and moved to
> data-stream-in.c.
> (input_cfg): Call renamed function.
> * lto-streamer-out.c (streamer_write_wi): Renamed and moved to
> data-stream-out.c.
> (output_cfg): Call renamed function.
> * data-streamer.h: Add declarations.
>
>>
>> There is no need to add additional hooks.
>>
>> Can you do this please?
>>
>> Thanks,
>> Richard.
>>
>>> Thanks,
>>> Kugan
>>>
>>> gcc/ChangeLog:
>>>
>>> 2016-08-04  Kugan Vivekanandarajah  
>>>
>>> * lto-streamer-in.c (lto_input_wide_int): New.
>>> * lto-streamer-out.c (lto_output_wide_int): Likewise.
>>> * lto-streamer.c (lto_streamer_hooks_init): Init write_wide_int
>>> and
>>> read_wide_int.
>>> * lto-streamer.h: Declare lto_input_wide_int and
>>> lto_output_wide_int.
>>> * streamer-hooks.h (struct streamer_hooks): Add write_wide_int
>>> and
>>> read_wide_int.
>>> (stream_write_wide_int): New macro.
>>> (stream_read_wide_int): Likewise.
>>>
>


Re: [PATCH][RFC] PR middle-end/22141 GIMPLE store widening pass

2016-08-04 Thread Richard Biener
On Wed, Aug 3, 2016 at 5:15 PM, Kyrill Tkachov
 wrote:
> Hi Richard,
>
> On 18/07/16 13:22, Richard Biener wrote:
>>
>> On Fri, Jul 15, 2016 at 5:13 PM, Kyrill Tkachov
>>  wrote:
>
>
> 
>
>
>> +  /* Record the original statements so that we can keep track of
>> +statements emitted in this pass and not re-process new
>> +statements.  */
>> +  for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next
>> (&gsi))
>> +   {
>> + gimple *stmt = gsi_stmt (gsi);
>> + if (!is_gimple_debug (stmt))
>> +   orig_stmts.add (stmt);
>> + num_statements++;
>> +   }
>>
>> please use gimple_set_visited () instead, that should be cheaper.
>>
>>
>> +  do
>> +   {
>> + changes_made = false;
>> + for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next
>> (&gsi))
>> +   {
>> ...
>> +   }
>> +  while (changes_made);
>>
>> looks pretty quadratic to me.  Instead of tracking things with
>> m_curr_base_expr
>> why not use a hash-map to track stores related to a base?
>>
>> +  /* Don't handle bitfields that are not a
>> multiple
>> + of BITS_PER_UNIT for now.  Can be extended
>> + later.  */
>> +  && (bitsize % BITS_PER_UNIT == 0)
>>
>> :(
>>
>> +  && !stmt_interferes_with_mem_accesses_p (lhs))
>>
>> given this function loops over all accesses this is quadratic as well.
>>
>> I think alias queries could be simplified if you reduce them to alias
>> checks on the base object (and allow overlapping constant stores
>> which should be easy to handle during merging).
>
>
> I've implemented that and it simplified the detecting code as well as its
> complexity
> but I'm now missing some cases that were being caught before.
> An example is:
> struct bar {
>   int a;
>   char b;
>   char c;
>   char d;
>   char e;
>   char g;
> };
>
> void
> foo1 (struct bar *p, char tmp)
> {
>   p->a = 0;
>   p->b = 0;
>   p->g = tmp;
>   p->c = 0;
>   p->d = 0;
>   p->e = 0;
> }
>
> The store to 'g' doesn't interfere with the contiguous stores to the early
> fields but because
> we perform the aliasing checks on the base object 'p' rather than the full
> LHS of the assignments
> this is deemed to alias the constant stores and only the first two and the
> last three constant stores
> are merged instead of the full 5 stores.
>
> I'll experiment with some solutions for this involving recording the
> non-constant stores and performing
> some trickery during the merging phase.

Not sure how/where exactly you perform the alias checks but alias
checks inbetween a group of same bases should use the full
reference to also factor in offsets/sizes.  Only cross-group I'd
resort to base-only alias-checks.

Richard.

> Thanks,
> Kyrill
>
>
>> The VUSE/VDEF handling is somewhat odd.  All stores have both
>> a VDEF and a VUSE, if you merge a set of them you can simply
>> re-use the VDEF/VUSE of one of them, effectively replacing the
>> stmt.  For all other stores you remove you miss a
>>unlink_stmt_vdef (stmt);
>>release_defs (stmt);
>> to update virtual SSA form and properly release SSA names.
>>
>> As you use TBAA in your alias checks you may only _sink_
>> stores.  Not sure if your merged store placement ensures this.
>>
>> I think this kind of transforms is useful in early optimizations, not only
>> very late as you perform it.  Of course it should be really cheap there.
>>
>> Note that options like -ftree-store-widening are discouraged
>> ("tree" does mean nothing to our users and store widening isn't
>> what is done - it's store merging).  Simply name it -fstore-merging.
>> Also the file name should be gimple-ssa-store-merging.c
>>
>> Looking forward to an algorithmically enhanced version.
>>
>> Richard.
>>
>>
>>> Thanks,
>>> Kyrill
>>>
>>> N.B. I'm going on vacation until August so I won't be able to respond to
>>> any
>>> feedback until I get back.
>>>
>>> [1] https://gcc.gnu.org/ml/gcc-patches/2009-09/msg01745.html
>>>
>>> 2016-07-15  Kyrylo Tkachov  
>>>
>>>  PR middle-end/22141
>>>  * Makefile.in (OBJS): Add tree-ssa-store-widening.o.
>>>  * common.opt (ftree-store-widening): New Optimization option.
>>>  * opts.c (default_options_table): Add entry for
>>>  OPT_ftree_store_widening.
>>>  * params.def (PARAM_STORE_WIDENING_ALLOW_UNALIGNED): Define.
>>>  * passes.def: Insert pass_tree_store_widening.
>>>  * tree-pass.h (make_pass_tree_store_widening): Declare extern
>>>  prototype.
>>>  * tree-ssa-store-widening.c: New file.
>>>  * doc/invoke.texi (Optimization Options): Document
>>>  -ftree-store-widening.
>>>
>>> 2016-07-15  Kyrylo Tkachov  
>>>  Jakub Jelinek  
>>>
>>>  PR middle-end/22141
>>>  * gcc.c-torture/execute/pr22141-1.c: New test.
>>>  * gcc.c-torture/execute/pr22141-2.c: Likewise.
>>>  * gcc.target/aarch64/ldp_stp_1.c: Adjust for -ftree

Re: [PATCH 15/17][ARM] Add tests for ARMv8.2-A FP16 support.

2016-08-04 Thread Ramana Radhakrishnan
On Mon, Jul 4, 2016 at 3:17 PM, Matthew Wahab
 wrote:
> On 17/05/16 15:48, Matthew Wahab wrote:
>> Support for using the half-precision floating point operations added by
>> the ARMv8.2-A FP16 extension is based on the macros and intrinsics added
>> to the ACLE for the extension.
>>
>> This patch adds tests to check the compilers treatment of the ACLE
>> macros and the code generated for the new intrinsics. It does not
>> include the executable tests for the
>> gcc.target/aarch64/advsimd-intrinsics testsuite. Those are added later
>> in the patch series.
>
> Changes since the previous version are:
>
> - Fix the vsqrte/vrsqrte spelling mistake.
>
> - armv8_2-fp16-scalar-2.c: Set option -std=c11, needed to test that
>   vaddh_f16 (vmulh_f16 (a, b), c) generates a VMLA. (Options enabled
>   with the default -std=g11 mean that VFMA would be generated
>   otherwise.)
>
> Tested the series for arm-none-linux-gnueabihf with native bootstrap and
> make check and for arm-none-eabi and armeb-none-eabi with make check on
> an ARMv8.2-A emulator.
>
> Ok for trunk?
> Matthew

OK.

regards
Ramana


>
> testsuite/
> 2016-07-04  Matthew Wahab  
>
>
> * gcc.target/arm/armv8_2-fp16-neon-1.c: New.
> * gcc.target/arm/armv8_2-fp16-scalar-1.c: New.
> * gcc.target/arm/armv8_2-fp16-scalar-2.c: New.
> * gcc.target/arm/attr-fp16-arith-1.c: Add a test of intrinsics
> support.
>


Re: [PATCH 16/17][ARM] Add tests for VFP FP16 ACLE instrinsics.

2016-08-04 Thread Ramana Radhakrishnan
On Mon, Jul 4, 2016 at 3:18 PM, Matthew Wahab
 wrote:
> On 18/05/16 11:58, Matthew Wahab wrote:
>> On 18/05/16 02:06, Joseph Myers wrote:
>>> On Tue, 17 May 2016, Matthew Wahab wrote:
>>>
 In some tests, there are unavoidable differences in precision when
 calculating the actual and the expected results of an FP16 operation. A
 new support function CHECK_FP_BIAS is used so that these tests can check
 for an acceptable margin of error. In these tests, the tolerance is
 given as the absolute integer difference between the bitvectors of the
 expected and the actual results.
>>>
>>> As far as I can see, CHECK_FP_BIAS is only used in the following patch,
>>> but there is another bias test in vsqrth_f16_1.c in this patch.
>>
>> This is my mistake, the CHECK_FP_BIAS is used for the NEON tests and
>> should
>>  have gone into that patch. The VFP test can do a simpler check so doesn't
>> need the macro.
>>
>>> Could you clarify where the "unavoidable differences in precision" come
>>> from? Are the results of some of the new instructions not fully
>>> specified,
>>> only specified within a given precision?  (As far as I can tell the
>>> existing v8 instructions for reciprocal and reciprocal square root
>>> estimates do have fully defined results, despite being loosely described
>>> as esimtates.)
>>
>> The expected results in the new tests are represented as expressions whose
>> value is expected to be calculated at compile-time. This makes the tests
>> more readable but differences in the precision between the the compiler
>> and
>> the HW calculations mean that for vrecpe_f16, vrecps_f16, vrsqrts_f16 and
>> vsqrth_f16_1.c the expected and actual results are different.
>>
>> On reflection, it may be better to remove the CHECK_FP_BIAS macro and, for
>> the tests that needed it, to drop the compiler calculation and just use
>> the
>>  expected hexadecimal value.
>>
>> Other tests depending on compiler-time calculations involve relatively
>> simple arithmetic operations and it's not clear if they are susceptible to
>> the same rounding errors. I have limited knowledge in FP arithmetic though
>> so I'll look into this.
>
> The scalar tests added in this patch and the vector tests added in the
> next patch have been reworked to use the exact values for the expected
> results rather than compile-time expressions. The CHECK_FP_BIAS macro is
> not used and is removed from this patch.
>
> The intention with these tests and with the vector tests is to check
> that the compiler emits code that produces the same results as the
> instruction regardless of any optimizations that it may apply. The
> expected results for the tests were produced using inline assembler
> taking the same inputs as the intrinsics being tested.
>
> Other changes are to add and use some (limited) templates for scalar
> operations and to add progress and error reporting, making the scalar
> tests more consistent with those for the vector operations.
>
> Tested the series for arm-none-linux-gnueabihf with native bootstrap and
> make check and for arm-none-eabi and armeb-none-eabi with make check on
> an ARMv8.2-A emulator.
>
> Ok for trunk?
> Matthew
>

OK, please watch out for any fallout from the autotesters especially
with strange multilib combinations.

Ramana

> testsuite/
> 2016-07-04  Jiong Wang  
> Matthew Wahab  
>
> * gcc.target/aarch64/advsimd-intrinsics/binary_scalar_op.inc: New.
> * gcc.target/aarch64/advsimd-intrinsics/unary_scalar_op.inc: New.
> * gcc.target/aarch64/advsimd-intrinsics/ternary_scalar_op.inc: New.
>
> * gcc.target/aarch64/advsimd-intrinsics/vabsh_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vaddh_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtah_s32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtah_u32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s32_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u32_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s32_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u32_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvth_s32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvth_u32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtph_s32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtph_u32_f16_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vdi

Re: fix fallout of pr22051-2.c on arm

2016-08-04 Thread Matthew Wahab

On 03/08/16 23:08, Prathamesh Kulkarni wrote:

Hi,
The attached patch fixes pr22051-2.c which regressed due to
r238754. Matthew, could you please confirm if this patch fixes the
test-case for you ?



Confirmed. (Tested with arm-none-linux-gnueabihf.)

Thanks
Matthew


Re: [PATCH GCC]Simplify interface for simplify_using_initial_conditions

2016-08-04 Thread Bin.Cheng
On Wed, Aug 3, 2016 at 11:17 PM, Jeff Law  wrote:
> On 08/03/2016 10:35 AM, Bin Cheng wrote:
>>
>> Hi,
>> When I introduced parameter STOP for expand_simple_operations, I also
>> added it for simplify_using_initial_conditions.  The STOP argument is also
>> passed to simplify_using_initial_conditions in
>> simple_iv_with_niters/loop_exits_before_overflow.  After analyzing case
>> reported by PR72772, I think STOP expanding is only needed for
>> expand_simple_operations when handling IV.step in tree-ssa-loop-ivopts.c.
>> For other cases like calls to simplify_using_initial_condition, both cond
>> and expr should be expanded to check tree expression equality.  This patch
>> does so.  It simplifies interface by removing parameter STOP, also moves
>> expand_simple_operations from tree_simplify_using_condition_1 to its caller.
>>
>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>
>> Thanks,
>> bin
>>
>> 2016-08-02  Bin Cheng  
>>
>> PR tree-optimization/72772
>> * tree-ssa-loop-niter.h (simplify_using_initial_conditions):
>> Delete
>> parameter STOP.
>> * tree-ssa-loop-niter.c (tree_simplify_using_condition_1): Delete
>> parameter STOP and update calls.  Move expand_simple_operations
>> function call from here...
>> (simplify_using_initial_conditions): ...to here.  Delete parameter
>> STOP.
>> (tree_simplify_using_condition): Delete parameter STOP.
>> * tree-scalar-evolution.c (simple_iv_with_niters): Update call to
>> simplify_using_initial_conditions.
>>
> OK.
> jeff

Thanks for reviewing.  Now I have a question about behavior of the
interface.  Although by expanding both cond and expr, this patch
catches more equality cases, it always returns expanded expr even it's
not simplified, while the original behavior only returns simplified
expr (not expanded).  For most use cases, it doesn't matter because we
only care if the simplified result is TRUE or FALSE, but in
computation of niter->assumption and niter->may_be_zeor, we may result
in different (expanded) expressions.  Not sure how much this
difference matters.  I can work on another version patch keeping the
old behavior if it worth keeping.

Thanks,
bin


Re: [RFC] ipa bitwise constant propagation

2016-08-04 Thread Prathamesh Kulkarni
On 4 August 2016 at 13:31, Richard Biener  wrote:
> On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:
>
>> Hi,
>> This is a prototype patch for propagating known/unknown bits 
>> inter-procedurally.
>> for integral types which propagates info obtained from get_nonzero_bits ().
>>
>> Patch required making following changes:
>> a) To make info from get_nonzero_bits() available to ipa, I had to remove
>> guard !nonzero_p in ccp_finalize. However that triggered the following ICE
>> in get_ptr_info() for default_none.f95 (and several other fortran tests)
>> with options: -fopenacc -O2
>> ICE: http://pastebin.com/KjD7HMQi
>> I confirmed with Richard that this was a latent issue.
>
> Can you plase bootstrap/test the fix for this separately?  (doesn't
> seem to be included in this patch btw)
Well I don't have the fix available -;)
>
>> b) I chose widest_int for representing value, mask in ipcp_bits_lattice
>> and correspondingly changed declarations for
>> bit_value_unop_1/bit_value_binop_1 to take
>> precision and sign instead of type (those are the only two fields that
>> were used). Both these functions are exported by tree-ssa-ccp.h
>> I hope that's ok ?
>
> That's ok, but please change the functions to overloads of
> bit_value_binop / bit_value_unop to not export ugly _1 names.
>
> -  signop sgn = TYPE_SIGN (type);
> -  int width = TYPE_PRECISION (type);
> +  signop sgn = type_sgn;
> +  int width = (int) type_precision;
>
> please adjust parameter names to get rid of those now unnecessary
> locals (and make the precision parameter an 'int').
>
>> c) Changed streamer_read_wi/streamer_write_wi to non-static.
>> Ah I see Kugan has submitted a patch for this, so I will drop this hunk.
>
> But he streams wide_int, not widest_int.  I followed up on his
> patch.
Oops, I got confused, sorry about that.
>
>> d) We have following in tree-ssa-ccp.c:get_default_value ():
>>   if (flag_tree_bit_ccp)
>> {
>>   wide_int nonzero_bits = get_nonzero_bits (var);
>>   if (nonzero_bits != -1)
>> {
>>   val.lattice_val = CONSTANT;
>>   val.value = build_zero_cst (TREE_TYPE (var));
>>   val.mask = extend_mask (nonzero_bits);
>> }
>>
>> extend_mask() sets all upper bits to 1 in nonzero_bits, ie, varying
>> in terms of bit-ccp.
>> I suppose in tree-ccp we need to extend mask if var is parameter since we 
>> don't
>> know in advance what values it will receive from different callers and mark 
>> all
>> upper bits as 1 to be safe.
>
> Not sure, it seems to me that we can zero-extend for unsigned types
> and sign-extend for signed types (if the "sign"-bit of nonzero_bits
> is one it properly makes higher bits undefined).  Can you change
> the code accordingly?  (simply give extend_mask a sign-op and use
> that appropriately?)  Please split out this change so it can be
> tested separately.
>
>> However I suppose with ipa, we can determine exactly which bits of
>> parameter are constant and
>> setting all upper bits to 1 will become unnecessary ?
>>
>> For example, consider following artificial test-case:
>> int f(int x)
>> {
>>   if (x > 300)
>> return 1;
>>   else
>> return 2;
>> }
>>
>> int main(int argc, char **argv)
>> {
>>   return f(argc & 0xc) + f (argc & 0x3);
>> }
>>
>> For x, the mask would be meet of:
>> <0, 0xc> meet <0, 0x3> == (0x3 | 0xc) | (0 ^ 0) == 0xf
>> and ipcp_update_bits() sets nonzero_bits for x to 0xf.
>> However get_default_value then calls extend_mask (0xf), resulting in
>> all upper bits
>> being set to 1 and consequently the condition if (x > 300) doesn't get 
>> folded.
>
> But then why would the code trying to optimize the comparison look at
> bits that are outside of the precision?  (where do we try to use this
> info?  I see that VRP misses to use nonzero bits if no range info
> is present - I suppose set_nonzero_bits misses to eventually adjust
> the range.
>
> That said, where is the folding code and why does it care for those
> "uninteresting" bits at all?
Well there is following in bit_value_binop_1 for case LT_EXPR / LE_EXPR:
/* If the most significant bits are not known we know nothing.  */
if (wi::neg_p (o1mask) || wi::neg_p (o2mask))
  break;

IIUC extend_mask extends all upper bits to 1, and we hit break and
thus not perform folding.
ccp2 dump shows:
Folding statement: if (x_2(D) > 300)
which is likely CONSTANT
Not folded

Instead if we extend based on signop, then the condition gets folded correctly:
Folding statement: if (x_2(D) > 300)
which is likely CONSTANT
Folding predicate x_2(D) > 300 to 0
gimple_simplified to if (0 != 0)
Folded into: if (0 != 0)

I thought it was unsafe for ccp to extend based on sign-op,
so I guarded that on DECL_SET_BY_IPA.
I will try to change extend_mask to extend the mask based on signop
and get rid of the flag.

I will address your other comments in follow-up patch.

Thanks,
Prathamesh
>
>> To resolve this, I added 

Re: [PATCH 17/17][ARM] Add tests for NEON FP16 ACLE intrinsics.

2016-08-04 Thread Ramana Radhakrishnan
On Mon, Jul 4, 2016 at 3:22 PM, Matthew Wahab
 wrote:
> On 17/05/16 15:52, Matthew Wahab wrote:
>> Support for using the half-precision floating point operations added by
>> the
>> ARMv8.2-A FP16 extension is based on the macros and intrinsics added to
>> the
>> ACLE for the extension.
>>
>> This patch adds executable tests for the ACLE Adv.SIMD (NEON) intrinsics
>> to
>> the advsimd-intrinsics testsuite.
>
> The tests added in the previous version of the patch, which only tested
> the f16 variants of intrinsics, are dropped. Instead, this patch extends
> the existing intrinsics tests to support the new f16 variants. Where the
> intrinsic is new, a new test for the intrinsic is added with f16 as the
> only variant. (This is consistent with existing practice, e.g vcvt.c.)
> The new tests are based on similar existing tests, e.g. maxnm_1.c is
> derived from max.c and the vcvt{a,m,p}_1.c tests, via vcvtX.inc, are
> based on vcvt.c.
>
> Since they are only available when the FP16 arithmetic instructions are
> enabled, advsimd-intrinsics.exp is updated to set -march=armv8.2+fp when
> the hardware supports it and the tests for the f16 intrinscs are guarded
> with __ARM_FEATURE_FP16_VECTOR_ARITHMETIC. Where a test has only f16
> variants, the test file itself is also guarded with
> dg-require-effective-target arm_v8_2a_fp16_neon_hw so that it reports
> UNSUPPORTED rather than PASS if FP16 isn't supported.
>
> Tested the series for arm-none-linux-gnueabihf with native bootstrap and
> make check and for arm-none-eabi and armeb-none-eabi with make check on
> an ARMv8.2-A emulator. Also tested the advsimd-intrinscs tests
> cross-compiled
> for aarch64-none-elf on an ARMv8.2-A emulator.
>
> Ok for trunk?

OK.

Thanks,
Ramana
> Matthew
>
> testsuite/
> 2016-07-04  Matthew Wahab  
>
> * gcc.target/advsimd-intrinsics/advsimd-intrinsics.exp: Enable
> -march=armv8.2-a+fp16 when supported by the hardware.
> * gcc.target/aarch64/advsimd-intrinsics/binary_op_float.inc: New.
> * gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc:
> Add F16 tests, enabled if macro HAS_FLOAT16_VARIANT is defined.  Add
> semi-colons to a macro invocations.
> * gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc: Add F16
> tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is
> defined.
> * gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/cmp_zero_op.inc: New.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vabd.c: Add F16
> tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is
> defined.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vabs.c: Likewise.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vadd.c: Likewise.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcage.c:
> Likewise.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcagt.c:
> Likewise.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcale.c:
> Likewise.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcalt.c:
> Likewise.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vceq.c: Likewise.
> * gcc.target/aarch64/advsimd-intrinsics/vceqz_1.c: New.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcge.c: Add F16
> tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is
> defined.
> * gcc.target/aarch64/advsimd-intrinsics/vcgez_1.c: New.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcgt.c: Add F16
> tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is
> defined.
> * gcc.target/aarch64/advsimd-intrinsics/vcgtz_1.c: New.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcle.c: Add F16
> tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is
> defined.
> * gcc.target/aarch64/advsimd-intrinsics/vclez_1.c: New.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vclt.c: Add F16
> tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is
> defined.
> * gcc.target/aarch64/advsimd-intrinsics/vcltz_1.c: New.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vcvt.c: Add F16
> tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is
> defined.  Also fix some white-space.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtX.inc: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvta_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtm_1.c: New.
> * gcc.target/aarch64/advsimd-intrinsics/vcvtp_1.c: New.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/vfma.c: Add F16
> tests, enabled if macro __ARM_FEATURE_FP16_VECTOR_ARITHMETIC is
> defined.  Also fix some long lines and white-space.
> * gcc.target/gcc.target/aarch64/advsimd-intrinsics/

Re: [RFC] ipa bitwise constant propagation

2016-08-04 Thread kugan



On 04/08/16 18:57, Prathamesh Kulkarni wrote:

On 4 August 2016 at 13:31, Richard Biener  wrote:

On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:


Hi,
This is a prototype patch for propagating known/unknown bits inter-procedurally.
for integral types which propagates info obtained from get_nonzero_bits ().

Patch required making following changes:
a) To make info from get_nonzero_bits() available to ipa, I had to remove
guard !nonzero_p in ccp_finalize. However that triggered the following ICE
in get_ptr_info() for default_none.f95 (and several other fortran tests)
with options: -fopenacc -O2
ICE: http://pastebin.com/KjD7HMQi
I confirmed with Richard that this was a latent issue.


Can you plase bootstrap/test the fix for this separately?  (doesn't
seem to be included in this patch btw)

Well I don't have the fix available -;)


This looks like what I fixed in 
https://patchwork.ozlabs.org/patch/648662/. I will commit that soon.


Thanks,
Kugan


Re: [LTO] Add wide_int streaming support

2016-08-04 Thread Jan Hubicka
> Hi Richard,
> 
> Thanks for the review.
> 
> On 04/08/16 17:26, Richard Biener wrote:
> >On Thu, Aug 4, 2016 at 6:12 AM, kugan  
> >wrote:
> >>Hi,
> >>
> >>During IPA-VRP implementation, I realized that we don't support streaming
> >>wide_int in LTO. Attached patch does this. Tested with IPA-VRP. Is this OK
> >>for trunk if bootstrap and regression testing is fine.
> >
> >Hmm, those functions belong to data-streamer-{in,out}.c and data-streamer.h
> >and should be named streamer_write_wide_int / streamer_read_wide_int.
> >
> >Note that we already have (non-exported) streamer_write_wi / streamer_read_wi
> >which operate on widest_ints.  Those also reside in lto-streamer-{in,out}.c 
> >and
> >should be moved to data-streamer.h (and be renamed to
> >streamer_write_widest_int).
> 
> I have now streamer_write_wide_int and streamer_write_widest_int.
> Similarly for reading. There is lot of similarity. I am not very
> familiar with wide_int so kept it that way. Is this OK now?
> 
> Thanks,
> Kugan
> 
> gcc/ChangeLog:
> 
> 2016-08-04  Kugan Vivekanandarajah  
> 
>   * data-streamer-in.c (streamer_read_wide_int): New.
>   (streamer_read_widest_int): Renamed function.
>   * data-streamer-out.c (streamer_write_wide_int): New
>   (streamer_write_widest_int): Renamed function.

I wondered, given we do C++ now, if we don't want to just have
stream_in/stream_out member functions for our classes and/or use just one
function name for all of them so one does not need to look up somewhat
irregular function names.

I find LTO streaming API very hard to memorize and use without constantly
looking up existing code.

Honza


Re: [AArch64] Handle HFAs of float16 types properly

2016-08-04 Thread James Greenhalgh
On Tue, Jul 26, 2016 at 02:55:02PM +0100, James Greenhalgh wrote:
> 
> Hi,
> 
> It looks like we've not been handling structures of 16-bit floating-point
> data correctly for AArch64. For some reason we end up passing them
> packed in to integer registers. That is to say, on trunk and GCC 6, for:
> 
>   struct x {
> __fp16 x[4];
>   };
> 
>   __fp16
>   foo1 (struct x x)
>   {
> return x.x[1];
>   }
> 
> We generate:
> 
>   foo1:
>   sbfxx0, x0, 16, 16
>   mov v0.h[0], w0
>   ret
> 
> Which is wrong.
> 
> This patch fixes that, so now we generate:
> 
>   foo1:
>   umovw0, v1.h[0]
>   sxthx0, w0
>   mov v0.h[0], w0
>   ret
> 
> Far from optimal (I'll work on that...) but at least getting the data from
> the right register bank!
> 
> To do this we need to keep around a reference to the fp16 type after we
> construct it. I've moved this initialisation to a new function
> aarch64_init_fp16_types in aarch64-builtins.c and made the references
> available through arm_neon.h.
> 
> After that, we want to remove the #if 0 wrapping HFmode support in
> aarch64_gimplify_va_arg_expr in aarch64.c, and add HFmode to the
> REAL_TYPE and COMPLEX_TYPE support in aapcs_vfp_sub_candidate.
> 
> Strictly speaking, we don't need the hunk regarding COMPLEX_TYPE.
> We can't build complex forms of __fp16. But, were we ever to support the
> _Float16 type we'd need this. Rather than leave the chance it will be
> forgotten about, I've just added it here. If the maintainers would prefer,
> I can change this to a TODO and put a sticky-note somewhere near my desk.
> 
> With those simple changes, we fix the argument passing. The rest of the
> patch is an update to the various testcases in aapcs64.exp to fully cover
> various __fp16 cases (both naked, and within an HFA).
> 
> Bootstrapped on aarch64-none-linux-gnu and tested with no issues. Also
> tested on aarch64_be-none-elf. All test came back clean.
> 
> OK? As this is an ABI break, I'm not proposing for it to go back to GCC 6,
> though it will apply cleanly there if the maintainers support that.

*Ping*

https://gcc.gnu.org/ml/gcc-patches/2016-07/msg01720.html

Thanks,
James

> 
> gcc/
> 
> 2016-07-26  James Greenhalgh  
> 
>   * config/aarch64/aarch64.h (aarch64_fp16_type_node): Declare.
>   (aarch64_fp16_ptr_type_node): Likewise.
>   * config/aarch64/aarch64-simd-builtins.c
>   (aarch64_fp16_ptr_type_node): Define.
>   (aarch64_init_fp16_types): New, refactored out of...
>   (aarch64_init_builtins): ...here, update to call
>   aarch64_init_fp16_types.
>   * config/aarch64/aarch64.c (aarch64_gimplify_va_arg_expr): Handle
>   HFmode.
>   (aapcs_vfp_sub_candidate): Likewise.
> 
> gcc/testsuite/
> 
> 2016-07-26  James Greenhalgh  
> 
>   * gcc.target/aarch64/aapcs64/abitest-common.h: Define half-precision
>   registers.
>   * gcc.target/aarch64/aapcs64/abitest.S (dumpregs): Add assembly for
>   saving the half-precision registers.
>   * gcc.target/aarch64/aapcs64/func-ret-1.c: Test that an __fp16
>   value is returned in h0.
>   * gcc.target/aarch64/aapcs64/test_2.c: Check that __FP16 arguments
>   are passed in FP/SIMD registers.
>   * gcc.target/aarch64/aapcs64/test_27.c: New, test that __fp16 HFA
>   passing works corrcetly.
>   * gcc.target/aarch64/aapcs64/type-def.h (hfa_f16x1_t): New.
>   (hfa_f16x2_t): Likewise.
>   (hfa_f16x3_t): Likewise.
>   * gcc.target/aarch64/aapcs64/va_arg-1.c: Check that __fp16 values
>   are promoted to double and passed in a double register.
>   * gcc.target/aarch64/aapcs64/va_arg-2.c: Check that __fp16 values
>   are promoted to double and stacked.
>   * gcc.target/aarch64/aapcs64/va_arg-4.c: Check stacking of HFA of
>   __fp16 data types.
>   * gcc.target/aarch64/aapcs64/va_arg-5.c: Likewise.
>   * gcc.target/aarch64/aapcs64/va_arg-16.c: New, check HFAs of
>   __fp16 first get passed in FP/SIMD registers, then stacked.
> 




[PATCH 1/2] Fix GNU coding style in gcov.c

2016-08-04 Thread Martin Liška
On 08/03/2016 04:22 PM, Nathan Sidwell wrote:
> Martin,
>> As I've going through all PRs related to gcov-profile, I've noticed this PR.
>> Current implementation of cycle detection in gcov is very poor, leading to 
>> extreme run time
>> for cases like mentioned in the PR (which does not contain a cycle). Thank 
>> to Joshua, I've
>> grabbed his patch and removed the scaffolding (classes: Arc, Block, ...) he 
>> did. After doing that
>> the patch is quite subtle and fast (of course).
> 
> sorry to be a pain, but could you split the patch into
> a) formatting changes
> b) the clever  bits
> 
> the formatting changes can then (probably) be applied as obvious.
> 
> nathan

That's all right, it's my mistake that I messed up coding style issues and
core of that patch.

Martin

>From d3ec7fc18df43ffcb39e1c9b9aacc269bd641ab5 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 4 Aug 2016 12:30:37 +0200
Subject: [PATCH 1/2] Fix GNU coding style in gcov.c

gcc/ChangeLog:

2016-08-04  Martin Liska  

	* gcov.c (main): Fix GNU coding style.
	(output_intermediate_file): Likewise.
	(process_file): Likewise.
	(generate_results): Likewise.
	(release_structures): Likewise.
	(create_file_names): Likewise.
	(find_source): Likewise.
	(read_graph_file): Likewise.
	(find_exception_blocks): Likewise.
	(canonicalize_name): Likewise.
	(make_gcov_file_name): Likewise.
	(mangle_name): Likewise.
	(accumulate_line_counts): Likewise.
	(output_branch_count): Likewise.
	(read_line): Likewise.
---
 gcc/gcov.c | 88 ++
 1 file changed, 42 insertions(+), 46 deletions(-)

diff --git a/gcc/gcov.c b/gcc/gcov.c
index 417b4f4..40701a1 100644
--- a/gcc/gcov.c
+++ b/gcc/gcov.c
@@ -435,7 +435,7 @@ main (int argc, char **argv)
   names = XNEWVEC (name_map_t, a_names);
   a_sources = 10;
   sources = XNEWVEC (source_t, a_sources);
-  
+
   argno = process_args (argc, argv);
   if (optind == argc)
 print_usage (true);
@@ -444,12 +444,12 @@ main (int argc, char **argv)
 multiple_files = 1;
 
   first_arg = argno;
-  
+
   for (; argno != argc; argno++)
 {
   if (flag_display_progress)
-printf ("Processing file %d out of %d\n",
-		argno - first_arg + 1, argc - first_arg);
+	printf ("Processing file %d out of %d\n", argno - first_arg + 1,
+		argc - first_arg);
   process_file (argv[argno]);
 }
 
@@ -671,8 +671,8 @@ output_intermediate_file (FILE *gcov_file, source_t *src)
 {
   /* function:,, */
   fprintf (gcov_file, "function:%d,%s,%s\n", fn->line,
-   format_gcov (fn->blocks[0].count, 0, -1),
-   flag_demangled_names ? fn->demangled_name : fn->name);
+	   format_gcov (fn->blocks[0].count, 0, -1),
+	   flag_demangled_names ? fn->demangled_name : fn->name);
 }
 
   for (line_num = 1, line = &src->lines[line_num];
@@ -681,8 +681,8 @@ output_intermediate_file (FILE *gcov_file, source_t *src)
 {
   arc_t *arc;
   if (line->exists)
-fprintf (gcov_file, "lcount:%u,%s\n", line_num,
- format_gcov (line->count, 0, -1));
+	fprintf (gcov_file, "lcount:%u,%s\n", line_num,
+		 format_gcov (line->count, 0, -1));
   if (flag_branches)
 for (arc = line->u.branches; arc; arc = arc->line_next)
   {
@@ -705,7 +705,6 @@ output_intermediate_file (FILE *gcov_file, source_t *src)
 }
 }
 
-
 /* Process a single input file.  */
 
 static void
@@ -717,7 +716,7 @@ process_file (const char *file_name)
   fns = read_graph_file ();
   if (!fns)
 return;
-  
+
   read_count_file (fns);
   while (fns)
 {
@@ -767,7 +766,7 @@ process_file (const char *file_name)
 	}
 	  if (line >= sources[src].num_lines)
 	sources[src].num_lines = line + 1;
-	  
+
 	  solve_flow_graph (fn);
 	  if (fn->has_catch)
 	find_exception_blocks (fn);
@@ -848,15 +847,14 @@ generate_results (const char *file_name)
   if (flag_gcov_file && flag_intermediate_format)
 {
   /* Open the intermediate file.  */
-  gcov_intermediate_filename =
-get_gcov_intermediate_filename (file_name);
+  gcov_intermediate_filename = get_gcov_intermediate_filename (file_name);
   gcov_intermediate_file = fopen (gcov_intermediate_filename, "w");
   if (!gcov_intermediate_file)
-{
-  fnotice (stderr, "Cannot open intermediate output file %s\n",
-   gcov_intermediate_filename);
-  return;
-}
+	{
+	  fnotice (stderr, "Cannot open intermediate output file %s\n",
+		   gcov_intermediate_filename);
+	  return;
+	}
 }
 
   for (ix = n_sources, src = sources; ix--; src++)
@@ -866,7 +864,7 @@ generate_results (const char *file_name)
 	  /* Ignore this source, if it is an absolute path (after
 	 source prefix removal).  */
 	  char first = src->coverage.name[0];
-  
+
 #if HAVE_DOS_BASED_FILE_SYSTEM
 	  if (first && src->coverage.name[1] == ':')
 	first = src->coverage.name[2];
@@ -874,7 +872,7 @@ generate_results (const char *file_name)

Re: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection, (PR gcov-profile/67992)

2016-08-04 Thread Martin Liška
On 08/03/2016 04:22 PM, Nathan Sidwell wrote:
> Martin,
>> As I've going through all PRs related to gcov-profile, I've noticed this PR.
>> Current implementation of cycle detection in gcov is very poor, leading to 
>> extreme run time
>> for cases like mentioned in the PR (which does not contain a cycle). Thank 
>> to Joshua, I've
>> grabbed his patch and removed the scaffolding (classes: Arc, Block, ...) he 
>> did. After doing that
>> the patch is quite subtle and fast (of course).
> 
> sorry to be a pain, but could you split the patch into
> a) formatting changes
> b) the clever  bits
> 
> the formatting changes can then (probably) be applied as obvious.
> 
> nathan

This is second part which is the change of loop detection algorithm.

Martin
>From 353e469aa2ac9260f31dd09aaedfd21eebc47c02 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 4 Aug 2016 12:34:08 +0200
Subject: [PATCH 2/2] gcov tool: Implement Hawick's algorithm for cycle
 detection, (PR gcov-profile/67992)

gcc/ChangeLog:

2016-08-04  Martin Liska  

	* gcov.c (line_t::has_block): New function.
	(handle_cycle): Likewise.
	(unblock): Likewise.
	(circuit): Likewise.
	(find_cycles): Likewise.
	(get_cycles_count): Likewise.
	(accumulate_line_counts): Use new loop detection algorithm.
---
 gcc/gcov.c | 287 +++--
 1 file changed, 186 insertions(+), 101 deletions(-)

diff --git a/gcc/gcov.c b/gcc/gcov.c
index 40701a1..f39a731 100644
--- a/gcc/gcov.c
+++ b/gcc/gcov.c
@@ -41,6 +41,11 @@ along with Gcov; see the file COPYING3.  If not see
 
 #include 
 
+#include 
+#include 
+
+using namespace std;
+
 #define IN_GCOV 1
 #include "gcov-io.h"
 #include "gcov-io.c"
@@ -222,6 +227,9 @@ typedef struct coverage_info
 
 typedef struct line_info
 {
+  /* Return true when NEEDLE is one of basic blocks the line belongs to.  */
+  bool has_block (block_t *needle);
+
   gcov_type count;	   /* execution count */
   union
   {
@@ -235,6 +243,16 @@ typedef struct line_info
   unsigned unexceptional : 1;
 } line_t;
 
+bool
+line_t::has_block (block_t *needle)
+{
+  for (block_t *n = u.blocks; n; n = n->chain)
+if (n == needle)
+  return true;
+
+  return false;
+}
+
 /* Describes a file mentioned in the block graph.  Contains an array
of line info.  */
 
@@ -407,6 +425,164 @@ static void release_structures (void);
 static void release_function (function_t *);
 extern int main (int, char **);
 
+/* Cycle detection!
+   There are a bajillion algorithms that do this.  Boost's function is named
+   hawick_cycles, so I used the algorithm by K. A. Hawick and H. A. James in
+   "Enumerating Circuits and Loops in Graphs with Self-Arcs and Multiple-Arcs"
+   (url at ).
+
+   The basic algorithm is simple: effectively, we're finding all simple paths
+   in a subgraph (that shrinks every iteration).  Duplicates are filtered by
+   "blocking" a path when a node is added to the path (this also prevents non-
+   simple paths)--the node is unblocked only when it participates in a cycle.
+   */
+
+/* Flag that drives cycle detection after a negative cycle is seen.  */
+static bool did_negate = false;
+
+/* Handle cycle identified by EDGES, where the function finds minimum cs_count
+   and subtract the value from all counts.  The subtracted value is added
+   to COUNT.  */
+
+static void
+handle_cycle (const vector &edges, int64_t &count)
+{
+  /* Find the minimum edge of the cycle, and reduce all nodes in the cycle by
+ that amount.  */
+  int64_t cycle_count = INT64_MAX;
+  for (unsigned i = 0; i < edges.size (); i++)
+{
+  int64_t ecount = edges[i]->cs_count;
+  if (cycle_count > ecount)
+	cycle_count = ecount;
+}
+  count += cycle_count;
+  for (unsigned i = 0; i < edges.size (); i++)
+edges[i]->cs_count -= cycle_count;
+
+  if (cycle_count < 0)
+did_negate = true;
+}
+
+/* Unblock a block U from BLOCKED.  Apart from that, iterate all blocks
+   blocked by U in BLOCK_LISTS.  */
+
+static void
+unblock (block_t *u, vector &blocked,
+	 vector > &block_lists)
+{
+  vector::iterator it = find (blocked.begin (), blocked.end (), u);
+  if (it == blocked.end ())
+return;
+
+  unsigned index = it - blocked.begin ();
+  blocked.erase (it);
+
+  for (vector::iterator it2 = block_lists[index].begin ();
+   it2 != block_lists[index].end (); it2++)
+unblock (*it2, blocked, block_lists);
+  for (unsigned j = 0; j < block_lists[index].size (); j++)
+unblock (u, blocked, block_lists);
+
+  block_lists.erase (block_lists.begin () + index);
+}
+
+/* Find circuit going to block V, PATH is provisional seen cycle.
+   BLOCKED is vector of blocked vertices, BLOCK_LISTS contains vertices
+   blocked by a block.  COUNT is accumulated count of the current LINE.  */
+
+static bool
+circuit (block_t *v, vector &path, block_t *start,
+	 vector &blocked, vector> &block_lists,
+	 line_t &linfo, int64_t &count)
+{
+  bool found = false;
+
+  /* Ad

Re: [RFC] ipa bitwise constant propagation

2016-08-04 Thread Richard Biener
On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:

> On 4 August 2016 at 13:31, Richard Biener  wrote:
> > On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:
> >
> >> Hi,
> >> This is a prototype patch for propagating known/unknown bits 
> >> inter-procedurally.
> >> for integral types which propagates info obtained from get_nonzero_bits ().
> >>
> >> Patch required making following changes:
> >> a) To make info from get_nonzero_bits() available to ipa, I had to remove
> >> guard !nonzero_p in ccp_finalize. However that triggered the following ICE
> >> in get_ptr_info() for default_none.f95 (and several other fortran tests)
> >> with options: -fopenacc -O2
> >> ICE: http://pastebin.com/KjD7HMQi
> >> I confirmed with Richard that this was a latent issue.
> >
> > Can you plase bootstrap/test the fix for this separately?  (doesn't
> > seem to be included in this patch btw)
> Well I don't have the fix available -;)

Oh, I thought it was obvious:

Index: gcc/tree-inline.c
===
--- gcc/tree-inline.c   (revision 239117)
+++ gcc/tree-inline.c   (working copy)
@@ -242,7 +242,8 @@ remap_ssa_name (tree name, copy_body_dat
   SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree)
= SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name);
   /* At least IPA points-to info can be directly transferred.  */
-  if (id->src_cfun->gimple_df
+  if (POINTER_TYPE_P (TREE_TYPE (name))
+ && id->src_cfun->gimple_df
  && id->src_cfun->gimple_df->ipa_pta
  && (pi = SSA_NAME_PTR_INFO (name))
  && !pi->pt.anything)
@@ -274,7 +275,8 @@ remap_ssa_name (tree name, copy_body_dat
   SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree)
= SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name);
   /* At least IPA points-to info can be directly transferred.  */
-  if (id->src_cfun->gimple_df
+  if (POINTER_TYPE_P (TREE_TYPE (name))
+ && id->src_cfun->gimple_df
  && id->src_cfun->gimple_df->ipa_pta
  && (pi = SSA_NAME_PTR_INFO (name))
  && !pi->pt.anything)

similarly range info could be transfered of course.

> >
> >> b) I chose widest_int for representing value, mask in ipcp_bits_lattice
> >> and correspondingly changed declarations for
> >> bit_value_unop_1/bit_value_binop_1 to take
> >> precision and sign instead of type (those are the only two fields that
> >> were used). Both these functions are exported by tree-ssa-ccp.h
> >> I hope that's ok ?
> >
> > That's ok, but please change the functions to overloads of
> > bit_value_binop / bit_value_unop to not export ugly _1 names.
> >
> > -  signop sgn = TYPE_SIGN (type);
> > -  int width = TYPE_PRECISION (type);
> > +  signop sgn = type_sgn;
> > +  int width = (int) type_precision;
> >
> > please adjust parameter names to get rid of those now unnecessary
> > locals (and make the precision parameter an 'int').
> >
> >> c) Changed streamer_read_wi/streamer_write_wi to non-static.
> >> Ah I see Kugan has submitted a patch for this, so I will drop this hunk.
> >
> > But he streams wide_int, not widest_int.  I followed up on his
> > patch.
> Oops, I got confused, sorry about that.
> >
> >> d) We have following in tree-ssa-ccp.c:get_default_value ():
> >>   if (flag_tree_bit_ccp)
> >> {
> >>   wide_int nonzero_bits = get_nonzero_bits (var);
> >>   if (nonzero_bits != -1)
> >> {
> >>   val.lattice_val = CONSTANT;
> >>   val.value = build_zero_cst (TREE_TYPE (var));
> >>   val.mask = extend_mask (nonzero_bits);
> >> }
> >>
> >> extend_mask() sets all upper bits to 1 in nonzero_bits, ie, varying
> >> in terms of bit-ccp.
> >> I suppose in tree-ccp we need to extend mask if var is parameter since we 
> >> don't
> >> know in advance what values it will receive from different callers and 
> >> mark all
> >> upper bits as 1 to be safe.
> >
> > Not sure, it seems to me that we can zero-extend for unsigned types
> > and sign-extend for signed types (if the "sign"-bit of nonzero_bits
> > is one it properly makes higher bits undefined).  Can you change
> > the code accordingly?  (simply give extend_mask a sign-op and use
> > that appropriately?)  Please split out this change so it can be
> > tested separately.
> >
> >> However I suppose with ipa, we can determine exactly which bits of
> >> parameter are constant and
> >> setting all upper bits to 1 will become unnecessary ?
> >>
> >> For example, consider following artificial test-case:
> >> int f(int x)
> >> {
> >>   if (x > 300)
> >> return 1;
> >>   else
> >> return 2;
> >> }
> >>
> >> int main(int argc, char **argv)
> >> {
> >>   return f(argc & 0xc) + f (argc & 0x3);
> >> }
> >>
> >> For x, the mask would be meet of:
> >> <0, 0xc> meet <0, 0x3> == (0x3 | 0xc) | (0 ^ 0) == 0xf
> >> and ipcp_update_bits() sets nonzero_bits for x to 0xf.
> >> However get_default_value then calls extend_mask (0xf), resulting in
> >> a

[PATCH][AArch64] Add legitimize_address_displacement hook

2016-08-04 Thread Wilco Dijkstra
This patch adds legitimize_address_displacement hook so that stack accesses
with large offsets are split into a more efficient sequence.  Byte and halfword
accesses use a 4KB range, wider accesses use a 16KB range to maximise the
available addressing range and increase opportunities to share the base address.

int f(int x)
{
  int arr[8192];
  arr[4096] = 0;
  arr[6000] = 0;
  arr[7000] = 0;
  arr[8191] = 0;
  return arr[x];
}

Now generates:

sub sp, sp, #32768
add x1, sp, 16384
str wzr, [x1]
str wzr, [x1, 7616]
str wzr, [x1, 11616]
str wzr, [x1, 16380]
ldr w0, [sp, w0, sxtw 2]
add sp, sp, 32768
ret

instead of:

sub sp, sp, #32768
mov x2, 28000
add x1, sp, 16384
mov x3, 32764
str wzr, [x1]
mov x1, 24000
add x1, sp, x1
str wzr, [x1]
add x1, sp, x2
str wzr, [x1]
add x1, sp, x3
str wzr, [x1]
ldr w0, [sp, w0, sxtw 2]
add sp, sp, 32768
ret

Bootstrap, GCC regression OK.

ChangeLog:
2016-08-04  Wilco Dijkstra  

gcc/
* config/aarch64/aarch64.c (aarch64_legitimize_address_displacement):
New function.
(TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT): Define.
--

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
a0e7680ad0946a27d95a67a9892bb7e264a90451..7bf12475494fb004f5a92445ae31fdc52af43c3b
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4132,6 +4132,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
   return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
 }
 
+/* Split an out-of-range address displacement into a base and offset.
+   Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise.  */
+
+static bool
+aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode 
mode)
+{
+  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
+
+  *off = GEN_INT (INTVAL (*disp) & ~mask);
+  *disp = GEN_INT (INTVAL (*disp) & mask);
+  return true;
+}
+
 /* Return TRUE if rtx X is immediate constant 0.0 */
 bool
 aarch64_float_const_zero_rtx_p (rtx x)
@@ -14096,6 +14109,10 @@ aarch64_optab_supported_p (int op, machine_mode mode1, 
machine_mode,
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
 
+#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
+#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
+  aarch64_legitimize_address_displacement
+
 #undef TARGET_LIBGCC_CMP_RETURN_MODE
 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode




[PATCH][AArch64] Improve stack adjustment

2016-08-04 Thread Wilco Dijkstra
Improve stack adjustment by reusing a temporary move immediate 
from the epilog if the register is still valid in the epilog.  This generates
smaller code for leaf functions:

mov x16, 4
sub sp, sp, x16
ldr w0, [sp, w0, sxtw 2]
add sp, sp, x16
ret

Passes GCC regression tests.

ChangeLog:
2016-08-04  Wilco Dijkstra  

gcc/
* config/aarch64/aarch64.c (aarch64_add_constant):
Add extra argument to allow emitting the move immediate.
Use add/sub with positive immediate.
(aarch64_expand_epilogue): Decide when to leave out move.

testsuite/
* gcc.target/aarch64/test_frame_17.c: New test.
--

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
ce2cc5ae3e1291f4ef4a8408461678c9397b06bd..5b59e4dd157351f301fc563a724cefe8a9be132c
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1941,15 +1941,21 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 }
 
 /* Add DELTA to REGNUM in mode MODE.  SCRATCHREG can be used to held
-   intermediate value if necessary.
+   intermediate value if necessary.  FRAME_RELATED_P should be true if
+   the RTX_FRAME_RELATED flag should be set and CFA adjustments added
+   to the generated instructions.  If SCRATCHREG is known to hold
+   abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
+   immediate again.
 
-   This function is sometimes used to adjust the stack pointer, so we must
-   ensure that it can never cause transient stack deallocation by writing an
-   invalid value into REGNUM.  */
+   Since this function may be used to adjust the stack pointer, we must
+   ensure that it cannot cause transient stack deallocation (for example
+   by first incrementing SP and then decrementing when adjusting by a
+   large immediate).  */
 
 static void
 aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
- HOST_WIDE_INT delta, bool frame_related_p)
+ HOST_WIDE_INT delta, bool frame_related_p,
+ bool emit_move_imm = true)
 {
   HOST_WIDE_INT mdelta = abs_hwi (delta);
   rtx this_rtx = gen_rtx_REG (mode, regnum);
@@ -1967,11 +1973,11 @@ aarch64_add_constant (machine_mode mode, int regnum, 
int scratchreg,
   return;
 }
 
-  /* We need two add/sub instructions, each one performing part of the
- calculation.  Don't do this if the addend can be loaded into register with
- a single instruction, in that case we prefer a move to a scratch register
- following by an addition.  */
-  if (mdelta < 0x100 && !aarch64_move_imm (delta, mode))
+  /* We need two add/sub instructions, each one perform part of the
+ addition/subtraction, but don't this if the addend can be loaded into
+ register by single instruction, in that case we prefer a move to scratch
+ register following by addition.  */
+  if (mdelta < 0x100 && !aarch64_move_imm (mdelta, mode))
 {
   HOST_WIDE_INT low_off = mdelta & 0xfff;
 
@@ -1985,8 +1991,10 @@ aarch64_add_constant (machine_mode mode, int regnum, int 
scratchreg,
 
   /* Otherwise use generic function to handle all other situations.  */
   rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
-  aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
-  insn = emit_insn (gen_add2_insn (this_rtx, scratch_rtx));
+  if (emit_move_imm)
+aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode);
+  insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
+ : gen_add2_insn (this_rtx, scratch_rtx));
   if (frame_related_p)
 {
   RTX_FRAME_RELATED_P (insn) = frame_related_p;
@@ -3288,7 +3296,8 @@ aarch64_expand_epilogue (bool for_sibcall)
   RTX_FRAME_RELATED_P (insn) = callee_adjust == 0;
 }
   else
-aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, final_adjust, true);
+aarch64_add_constant (Pmode, SP_REGNUM, IP1_REGNUM, final_adjust, true,
+ df_regs_ever_live_p (IP1_REGNUM));
 
   aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM,
callee_adjust != 0, &cfi_ops);
@@ -3311,7 +3320,8 @@ aarch64_expand_epilogue (bool for_sibcall)
   cfi_ops = NULL;
 }
 
-  aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, initial_adjust, true);
+  aarch64_add_constant (Pmode, SP_REGNUM, IP0_REGNUM, initial_adjust, true,
+   df_regs_ever_live_p (IP0_REGNUM));
 
   if (cfi_ops)
 {
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_17.c 
b/gcc/testsuite/gcc.target/aarch64/test_frame_17.c
new file mode 100644
index 
..c214431999b60cce8a75204876a8c73ec6304128
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_17.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 --save-temps" } */
+
+/* Test reuse of stack adjustment temporaries.  */

[PATCH] Simplify std::__invoke_impl definitions

2016-08-04 Thread Jonathan Wakely

Some minor tidying before I add support for is_callable and
is_nothrow_callable.

* include/std/functional (_Unwrap): Rename to __inv_unwrap.
(__invfwd): Adjust.
(__invoke_impl): Remove unused template parameters.
* testsuite/20_util/function_objects/invoke/59768.cc: Remove unused
parameter.
* testsuite/20_util/function_objects/invoke/ref_ext.cc: Copy 59768.cc
and test __invoke extension for C++11.

Tested powerpc64-linux, committed to trunk.

commit 49722e0f664d5b50d14ee6acea201d2ec46cc64d
Author: Jonathan Wakely 
Date:   Thu Aug 4 11:28:32 2016 +0100

Simplify std::__invoke_impl definitions

* include/std/functional (_Unwrap): Rename to __inv_unwrap.
(__invfwd): Adjust.
(__invoke_impl): Remove unused template parameters.
* testsuite/20_util/function_objects/invoke/59768.cc: Remove unused
parameter.
* testsuite/20_util/function_objects/invoke/ref_ext.cc: Copy 59768.cc
and test __invoke extension for C++11.

diff --git a/libstdc++-v3/include/std/functional 
b/libstdc++-v3/include/std/functional
index 700505e..d635ef5 100644
--- a/libstdc++-v3/include/std/functional
+++ b/libstdc++-v3/include/std/functional
@@ -185,31 +185,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 { };
 
   template::type>
-struct _Unwrap
+struct __inv_unwrap
 {
-  using type = _Tp&&;
-
-  // Equivalent to std::forward<_Tp>
-  static constexpr _Tp&&
-  _S_fwd(_Tp& __t) noexcept { return static_cast<_Tp&&>(__t); }
+  using type = _Tp;
 };
 
   template
-struct _Unwrap<_Tp, reference_wrapper<_Up>>
+struct __inv_unwrap<_Tp, reference_wrapper<_Up>>
 {
   using type = _Up&;
-
-  // Get an lvalue-reference from a reference_wrapper.
-  static _Up&
-  _S_fwd(const _Tp& __t) noexcept { __t.get(); }
 };
 
   // Used by __invoke_impl instead of std::forward<_Tp> so that a
   // reference_wrapper is converted to an lvalue-reference.
-  template
-inline typename _Unwrap<_Tp>::type
+  template::type>
+inline _Up&&
 __invfwd(typename remove_reference<_Tp>::type& __t) noexcept
-{ return _Unwrap<_Tp>::_S_fwd(__t); }
+{ return static_cast<_Up&&>(__t); }
 
   template
 inline _Res
@@ -235,16 +227,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   return ((*std::forward<_Tp>(__t)).*__f)(std::forward<_Args>(__args)...);
 }
 
-  template
+  template
 inline _Res
-__invoke_impl(__invoke_memobj_ref, _MemFun&& __f, _Tp&& __t)
+__invoke_impl(__invoke_memobj_ref, _MemPtr&& __f, _Tp&& __t)
 noexcept(noexcept(__invfwd<_Tp>(__t).*__f))
 { return __invfwd<_Tp>(__t).*__f; }
 
-  template
+  template
 inline _Res
-__invoke_impl(__invoke_memobj_deref, _MemFun&& __f, _Tp&& __t,
- _Args&&... __args)
+__invoke_impl(__invoke_memobj_deref, _MemPtr&& __f, _Tp&& __t)
 noexcept(noexcept((*std::forward<_Tp>(__t)).*__f))
 { return (*std::forward<_Tp>(__t)).*__f; }
 
diff --git a/libstdc++-v3/testsuite/20_util/function_objects/invoke/59768.cc 
b/libstdc++-v3/testsuite/20_util/function_objects/invoke/59768.cc
index 2a519ea..6aaae22 100644
--- a/libstdc++-v3/testsuite/20_util/function_objects/invoke/59768.cc
+++ b/libstdc++-v3/testsuite/20_util/function_objects/invoke/59768.cc
@@ -21,7 +21,7 @@
 #include 
 
 struct A {
-  void foo(int n) { }
+  void foo(int) { }
 };
 
 void
diff --git a/libstdc++-v3/testsuite/20_util/function_objects/invoke/ref_ext.cc 
b/libstdc++-v3/testsuite/20_util/function_objects/invoke/ref_ext.cc
new file mode 100644
index 000..d7e5766
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/function_objects/invoke/ref_ext.cc
@@ -0,0 +1,58 @@
+// Copyright (C) 2015-2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-do compile { target c++11 } }
+
+#include 
+
+struct A {
+  void foo(int) { }
+};
+
+void
+test01()
+{
+  // PR libstdc++/59768
+  A a;
+  auto ref = std::ref(a);
+  std::__invoke(&A::foo, ref, 100);// lvalue
+  std::__invoke(&A::foo, std::move(ref), 100); // rvalue
+  const auto refc = std::ref(a);
+  std::__invoke(&A::foo, refc, 100);   // const lvalue
+  std::__invoke(&A::foo, std::move(refc), 100);// const rvalue
+}
+
+struct B {
+  int bar = 0;
+};
+
+void

[PATCH][AArch64] Simplify eh_return implementation

2016-08-04 Thread Wilco Dijkstra
This patch simplifies the handling of the EH return value.  We force the use of 
the
frame pointer so the return location is always at FP + 8.  This means we can 
emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:
2016-08-04  Wilco Dijkstra  

gcc/
* config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
* config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
(EH_RETURN_HANDLER_RTX): New define.
* config/aarch64/aarch64.c (aarch64_frame_pointer_required):
Force frame pointer in EH return functions.
(aarch64_final_eh_return_addr): Remove.
(aarch64_eh_return_handler_rtx): New function.
* config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
Remove.
(aarch64_eh_return_handler_rtx): New prototype.

--
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853daf08842955288861ec7e7acca
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL) \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
405a75914ceed81c6bbbe1384fce815f6d673d6c..f951323586742a6313b5bb345252ce4b4d10debd
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2718,6 +2718,10 @@ aarch64_frame_pointer_required (void)
   && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
 return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+return true;
+
   return false;
 }
 
@@ -3348,52 +3352,15 @@ aarch64_expand_epilogue (bool for_sibcall)
 emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch64_final_eh_return_addr (void)
+aarch64_eh_return_handler_rtx (void)
 {
-  HOST_WIDE_INT fp_offset;
-
-  aarch64_layout_frame ();
-
-  fp_offset = cfun->machine->frame.frame_size
- - cfun->machine->frame.hard_fp_offset;
-
-  if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
-return gen_rtx_REG (DImode, LR_REGNUM);
-
-  /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2.  This can
- result in a store to save LR introduced by builtin_eh_return () being
- incorrectly deleted because the alias is not detected.
- So in the calculation of the address to copy the exception unwinding
- return address to, we note 2 cases.
- If FP is needed and the fp_offset is 0, it means that SP = FP and hence
- we return a SP-relative location since all the addresses are SP-relative
- in this case.  This prevents the store from being optimized away.
- If the fp_offset is not 0, then the addresses will be FP-relative and
- therefore we return a FP-relative location.  */
-
-  if (frame_pointer_needed)
-{
-  if (fp_offset)
-return gen_frame_mem (DImode,
- plus_constant (Pmode, hard_frame_pointer_rtx, 
UNITS_PER_WORD));
-  else
-return 

protected alloca class for malloc fallback

2016-08-04 Thread Aldy Hernandez

Howdy!

As part of my -Walloca-larger-than=life work, I've been running said 
pass over gcc, binutils, and gdb, and trying to fix things along the way.


Particularly irritating and error prone is having to free malloc'd 
pointers on every function exit point.  We end up with a lot of:


foo(size_t len)
{
  void *p, *m_p = NULL;
  if (len < HUGE)
p = alloca(len);
  else
p = m_p = malloc(len);
  if (something)
goto out;
  stuff();
out:
  free (m_p);
}

...which nobody really likes.

I've been thinking that for GCC we could have a protected_alloca class 
whose destructor frees any malloc'd memory:


void foo()
{
  char *p;
  protected_alloca chunk(5);
  p = (char *) chunk.pointer();
  f(p);
}

This would generate:

void foo() ()
{
  void * _3;

  :
  _3 = malloc (5);
  f (_3);

  :
  free (_3); [tail call]
  return;
}

Now the problem with this is that the memory allocated by chunk is freed 
when it goes out of scope, which may not be what you want.  For example:


 func()
 {
   char *str;
   {
 protected_alloca chunk ();
 // malloc'd pointer will be freed when chunk goes out of scope.
 str = (char *) chunk.pointer ();
   }
   use (str);  // BAD!  Use after free.
 }

In the attached patch implementing this class I have provided another 
idiom for avoiding this problem:


 func()
 {
   void *ptr;
   protected_alloca chunk;
   {
 chunk.alloc (999);
 str = (char *) chunk.pointer ();
   }
   // OK, pointer will be freed on function exit.
   use (str);
 }

So I guess it's between annoying gotos and keeping track of multiple 
exit points to a function previously calling alloca, or making sure the 
protected_alloca object always resides in the scope where the memory is 
going to be used.


Is there a better blessed C++ way?  If not, is this OK?

Included is the conversion of tree.c.  More to follow once we agree on a 
solution.


Tested on x86-64 Linux.

Aldy
commit fd0078ef60dd75ab488392e0e05b28f27d971bdf
Author: Aldy Hernandez 
Date:   Thu Aug 4 06:43:37 2016 -0400

* protected-alloca.h: New.
* tree.c (get_file_function_name): Use protected_alloca.
(tree_check_failed): Same.
(tree_not_check_failed): Same.
(tree_range_check_failed): Same.
(omp_clause_range_check_failed): Same.

diff --git a/gcc/protected-alloca.h b/gcc/protected-alloca.h
new file mode 100644
index 000..62f2a7b
--- /dev/null
+++ b/gcc/protected-alloca.h
@@ -0,0 +1,113 @@
+/* Alloca wrapper with a malloc fallback.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   Contributed by Aldy Hernandez .
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#ifndef _PROTECTED_ALLOCA_H_
+#define _PROTECTED_ALLOCA_H_
+
+#ifndef MAX_ALLOCA_SIZE
+#define MAX_ALLOCA_SIZE 4096
+#endif
+
+#ifdef __GNUC__
+#define _ALLOCA_INLINE_ __attribute__((always_inline))
+#else
+#define _ALLOCA_INLINE_
+#endif
+
+/* This is a wrapper class for alloca that falls back to malloc if the
+   allocation size is > MAX_ALLOCA_SIZE.  It is meant to replace:
+
+ char *str = (char *) alloca (N);
+
+   by this:
+
+ protected_alloca chunk (N);
+ char *str = (char *) chunk.pointer ();
+
+   or this:
+
+ protected_alloca chunk;
+ chunk.alloc (N);
+ char *str = (char *) chunk.pointer ();
+
+   If N > MAX_ALLOCA_SIZE, malloc is used, and whenever `chunk' goes
+   out of scope, the malloc'd memory will be freed.  Keep in mind that
+   the malloc'd memory gets freed when `chunk' goes out of scope, and
+   may be freed earlier than expected.  For example:
+
+ func()
+ {
+   char *str;
+   {
+ protected_alloca chunk ();
+// If malloc'd, pointer will be freed when chunk goes out of scope.
+ str = (char *) chunk.pointer ();
+   }
+   use (str);  // BAD!  Use after free.
+ }
+
+   In this case, it is best to use the following idiom:
+
+ func()
+ {
+   void *ptr;
+   protected_alloca chunk;
+   {
+ chunk.alloc (999);
+str = (char *) chunk.pointer ();
+   }
+   // OK, pointer will be freed on function exit.
+   use (str);
+ }
+*/
+
+class protected_alloca {
+  void *p;
+  bool on_heap;
+public:
+  // GCC will refuse to inline a function that calls alloca unless
+  // `always_inline' is used, for

Re: [PATCH 1/2] Fix GNU coding style in gcov.c

2016-08-04 Thread Nathan Sidwell

On 08/04/16 06:39, Martin Liška wrote:

On 08/03/2016 04:22 PM, Nathan Sidwell wrote:

Martin,

As I've going through all PRs related to gcov-profile, I've noticed this PR.
Current implementation of cycle detection in gcov is very poor, leading to 
extreme run time
for cases like mentioned in the PR (which does not contain a cycle). Thank to 
Joshua, I've
grabbed his patch and removed the scaffolding (classes: Arc, Block, ...) he 
did. After doing that
the patch is quite subtle and fast (of course).


sorry to be a pain, but could you split the patch into
a) formatting changes
b) the clever  bits

the formatting changes can then (probably) be applied as obvious.

nathan


That's all right, it's my mistake that I messed up coding style issues and
core of that patch.


Thanks.  the formatting patch is fine.  Reading the other one next ...

nathan



Re: libgo patch committed: Update to 1.7rc3

2016-08-04 Thread Rainer Orth
Hi Uros,

>> I have committed a patch to update libgo to the 1.7rc3 release
>> candidate.  This is very close to the upcoming 1.7 release.  As usual
>> with libgo updates, the patch is too large to include in this e-mail
>> message.  I've appended the changes to the gccgo-specific directories.
>
> There is an issue with
>
> libgo/go/crypto/sha1/issue15617_test.go.
>
> The test crypto/sha1 fails on alpha-linux-gnu with:
>
> --- FAIL: TestOutOfBoundsRead (0.00s)
> panic: invalid argument [recovered]
> panic: invalid argument
> ...
>
> since the test hard-codes 4k pages, but alpha uses 8k pages.
>
> It looks that the second line of build directives in the test:
>
> // +build amd64
> // +build linux darwin
>
> overwrites the first one, so the test runs also on non-amd64
> architecture linux OS. I have confirmed this by removing the second
> build directive, and crypto/sha1 test then passed, since
> issue15617_test.go was not linked into the final executable.
>
> Another possible solution is to avoid hard-coding 4k pages in the
> test. The mentioned test will pass on alpha when
>
> const pageSize = 4 << 10
>
> is changed to
>
> const pageSize = 8 << 10

just FTR, I've been seeing the same failure on Solaris/SPARC, which
uses 8k pages.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Teach VRP to truncate the case ranges of a switch

2016-08-04 Thread Patrick Palka
On Thu, 4 Aug 2016, Richard Biener wrote:

> On Thu, Aug 4, 2016 at 4:30 AM, Patrick Palka  wrote:
> > On Wed, 3 Aug 2016, David Malcolm wrote:
> >
> >> On Wed, 2016-08-03 at 15:47 +0200, Richard Biener wrote:
> >> > On Wed, Aug 3, 2016 at 6:00 AM, Patrick Palka 
> >> > wrote:
> >> > > VRP currently has functionality to eliminate case labels that lie
> >> > > completely outside of the switch operand's value range.  This patch
> >> > > complements this functionality by teaching VRP to also truncate the
> >> > > case
> >> > > label ranges that partially overlap with the operand's value range.
> >> > >
> >> > > Bootstrapped and regtested on x86_64-pc-linux-gnu.  Does this look
> >> > > like
> >> > > a reasonable optimization?  Admittedly, its effect will almost
> >> > > always be
> >> > > negligible except in cases where a case label range spans a large
> >> > > number
> >> > > of values which is a pretty rare thing.  The optimization triggered
> >> > > about 250 times during bootstrap.
> >> >
> >> > I think it's most useful when the range collapses to a single value.
> >> >
> >> > Ok.
> >>
> >> Is this always an improvement?   I can see that it can simplify things,
> >> eliminate dead code etc, but could it make evaluating the switch less
> >> efficient?
> >>
> >> Consider e.g.
> >>
> >>  void
> >>  test (char ch)
> >>  {
> >>if (ch > 17)
> >>  return;
> >>
> >>switch (ch)
> >>  {
> >>  case 0:
> >>foo (); break;
> >>
> >>  case 1 .. 255:
> >>bar (); break;
> >>  }
> >>  }
> >>
> >> which (assuming this could survive this far in this form) previously
> >> could be implemented as a simple "if (ch == 0)" but with this would get
> >> simplified to:
> >>
> >>  void
> >>  test (char ch)
> >>  {
> >>if (ch > 17)
> >>  return;
> >>
> >>switch (ch)
> >>  {
> >>  case 0:
> >>foo (); break;
> >>
> >>  case 1 .. 17:
> >>bar (); break;
> >>  }
> >>  }
> >>
> >> which presumably introduces a compare against 17 in the implementation of 
> >> the switch; does the new compare get optimized away by jump threading?
> >
> > In this particular example the final code does get worse with the patch
> > for the reason you mentioned:
> >
> > Before:After:
> > test:  test:
> > .LFB0: .LFB0:
> > .cfi_startproc .cfi_startproc
> > cmpb$17, %dil  cmpb$17, %dil
> > ja  .L1ja  .L1
> > xorl%eax, %eax subl$1, %edi
> > cmpb$1, %dil   xorl%eax, %eax
> > jb  .L7cmpb$16, %dil
> > jmp barja  .L7
> > .p2align 4,,10 jmp bar
> > .p2align 3 .p2align 4,,10
> > .L7:   .p2align 3
> > jmp foo.L7:
> > .p2align 4,,10 jmp foo
> > .p2align 3 .p2align 4,,10
> > .L1:   .p2align 3
> > rep ret.L1:
> > .cfi_endproc   rep ret
> >.cfi_endproc
> >
> > What's weird is that during gimplification the switch gets simplified to
> >
> >   switch (ch)
> >   {
> > default: foo (); break;
> > case 1 ... 255: bar (); break;
> >   }
> >
> > but if anything I would have expected it to get simplified to
> >
> >   switch (ch)
> >   {
> > case 0: foo (); break;
> > default: bar (); break;
> >   }
> >
> > In general, when case labels are exhaustive, maybe it would be better to
> > designate the case label that has the widest range as the default label?
> > (Currently preprocess_case_label_vec_for_gimple() just designates the
> > very first label to be the default label.)  That would fix this
> > particular regression at least.
> 
> Yes, that looks useful - though I wonder how easy it is to detect for the
> cases where there are more than one case/default.
> 
> Richard.
> 

Here's a patch that does this.  Does it look OK to commit after
bootstrap + regtesting?

-- >8 --

gcc/ChangeLog:

* gimple.c (preprocess_case_label_vec_for_gimple): When the case
labels are exhaustive, designate the label with the widest
range to be the default label.

gcc/testsuite/ChangeLog:

* gcc.dg/switch-11.c: New test.

---
 gcc/gimple.c | 14 +-
 gcc/testsuite/gcc.dg/switch-11.c | 22 ++
 2 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/switch-11.c

diff --git a/gcc/gimple.c b/gcc/gimple.c
index e275dfc..fc81e52 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -2946,18 +2946,30 @@ preprocess_case_label_vec_for_gimple (vec labels,
high = CASE_LOW (labels[len - 1]);
  if (tree_int_cst_equal (high, TY

Re: libgo patch committed: Update to 1.7rc3

2016-08-04 Thread Rainer Orth
Hi Uros,

> BTW: I can see this failure on other targets, too, e.g. powerpc64le
> [1] and aarch64 [2].
>
> [1] https://gcc.gnu.org/ml/gcc-testresults/2016-08/msg00321.html
> [2] https://gcc.gnu.org/ml/gcc-testresults/2016-08/msg00318.html

right, both Solaris/x86 and Solaris/SPARC are affected, too (32 and
64-bit in both cases).

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


[PATCH] Create x.gcov file for binary w/o x.gcda file (PR, gcov-profile/65831)

2016-08-04 Thread Martin Liška
Hi.

Following patch is grabbed from the PR, where I just applied the patch
and wrote a test-case which removes x.gcda file before running gcov tool.

Ready to be installed?
Martin
>From 0e8a129302eaf8b5950b7b1a6de560b9c2ee4354 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 4 Aug 2016 14:04:36 +0200
Subject: [PATCH] Create x.gcov file for binary w/o x.gcda file (PR
 gcov-profile/65831)

gcc/testsuite/ChangeLog:

2016-08-04  Martin Liska  

	* g++.dg/gcov/gcov-16.C: New test.
	* lib/gcov.exp: Support new argument for run-gcov function.

gcc/ChangeLog:

2016-08-04  Martin Liska  
	Adam Fineman  

	* gcov.c (process_file): Create .gcov file when .gcda
	file is missing.
---
 gcc/gcov.c  |  2 +-
 gcc/testsuite/g++.dg/gcov/gcov-16.C | 10 ++
 gcc/testsuite/lib/gcov.exp  | 28 
 3 files changed, 35 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-16.C

diff --git a/gcc/gcov.c b/gcc/gcov.c
index 417b4f4..50061c7 100644
--- a/gcc/gcov.c
+++ b/gcc/gcov.c
@@ -725,7 +725,7 @@ process_file (const char *file_name)
 
   fns = fn->next;
   fn->next = NULL;
-  if (fn->counts)
+  if (fn->counts || no_data_file)
 	{
 	  unsigned src = fn->src;
 	  unsigned line = fn->line;
diff --git a/gcc/testsuite/g++.dg/gcov/gcov-16.C b/gcc/testsuite/g++.dg/gcov/gcov-16.C
new file mode 100644
index 000..f09d406
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gcov/gcov-16.C
@@ -0,0 +1,10 @@
+// PR gcov-profile/64634
+// { dg-options "-fprofile-arcs -ftest-coverage" }
+// { dg-do run { target native } }
+
+int main()
+{
+  return 0;   /* count(#) */
+}
+
+// { dg-final { run-gcov remove-gcda gcov-16.C } }
diff --git a/gcc/testsuite/lib/gcov.exp b/gcc/testsuite/lib/gcov.exp
index dfc1301..02bc6b9 100644
--- a/gcc/testsuite/lib/gcov.exp
+++ b/gcc/testsuite/lib/gcov.exp
@@ -20,15 +20,27 @@
 global GCOV
 
 #
+# clean-gcov-file -- delete a working file the compiler creates for gcov
+#
+# TESTCASE is the name of the test.
+# SUFFIX is file suffix
+
+proc clean-gcov-file { testcase suffix } {
+set basename [file tail $testcase]
+set base [file rootname $basename]
+remote_file host delete $base.$suffix
+}
+
+#
 # clean-gcov -- delete the working files the compiler creates for gcov
 #
 # TESTCASE is the name of the test.
 #
 proc clean-gcov { testcase } {
-set basename [file tail $testcase]
-set base [file rootname $basename]
-remote_file host delete $base.gcno $base.gcda \
-	$basename.gcov $base.h.gcov
+clean-gcov-file $testcase "gcno"
+clean-gcov-file $testcase "gcda"
+clean-gcov-file $testcase "gcov"
+clean-gcov-file $testcase "h.gcov"
 }
 
 #
@@ -305,6 +317,7 @@ proc run-gcov { args } {
 set gcov_verify_branches 0
 set gcov_verify_lines 1
 set gcov_verify_intermediate 0
+set gcov_remove_gcda 0
 set xfailed 0
 
 foreach a $args {
@@ -317,6 +330,8 @@ proc run-gcov { args } {
 	  set gcov_verify_calls 0
 	  set gcov_verify_branches 0
 	  set gcov_verify_lines 0
+	} elseif { $a == "remove-gcda" } {
+	  set gcov_remove_gcda 1
 	} elseif { $gcov_args == "" } {
 	set gcov_args $a
 	} else {
@@ -332,6 +347,11 @@ proc run-gcov { args } {
 # Extract the test file name from the arguments.
 set testcase [lindex $gcov_args end]
 
+if { $gcov_remove_gcda } {
+	verbose "Removing $testcase.gcda"
+	clean-gcov-file $testcase "gcda"
+}
+
 verbose "Running $GCOV $testcase" 2
 set testcase [remote_download host $testcase]
 set result [remote_exec host $GCOV $gcov_args]
-- 
2.9.2



Re: [PATCH] Define feature-test macro for std::enable_shared_from_this

2016-08-04 Thread Jonathan Wakely

On 03/08/16 20:11 +0100, Jonathan Wakely wrote:

Another feature we already support, so just define the macro.

* include/bits/shared_ptr_base.h (__cpp_lib_enable_shared_from_this):
Define feature-test macro.
* testsuite/20_util/enable_shared_from_this/members/reinit.cc: Test
for the macro.

Tested x86_64-linux, committed to trunk.


I realised we don't actually implement the whole feature, because we
don't have the new weak_from_this() members (careless of me to forget
the contents of my own proposal!)

This adds them for C++17, or gnu++1*, and only defines the
feature-test macro when those members are present.

Tested powerpc64-linux, committed to trunk.


commit 7c1f28db94c3cb1a28dba4efd0c648bc6c6bb329
Author: Jonathan Wakely 
Date:   Thu Aug 4 13:04:14 2016 +0100

Define std::enable_shared_from_this::weak_from_this

	* testsuite/20_util/enable_shared_from_this/members/reinit.cc: Use
	effective target not dg-options. Move check for feature-test macro to:
	* testsuite/20_util/enable_shared_from_this/members/weak_from_this.cc:
	New test.

diff --git a/libstdc++-v3/include/bits/shared_ptr.h b/libstdc++-v3/include/bits/shared_ptr.h
index 483c2bc..747b09a 100644
--- a/libstdc++-v3/include/bits/shared_ptr.h
+++ b/libstdc++-v3/include/bits/shared_ptr.h
@@ -586,6 +586,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   shared_from_this() const
   { return shared_ptr(this->_M_weak_this); }
 
+#if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
+#define __cpp_lib_enable_shared_from_this 201603
+  weak_ptr<_Tp>
+  weak_from_this()
+  { return this->_M_weak_this; }
+
+  weak_ptr
+  weak_from_this() const
+  { return this->_M_weak_this; }
+#endif
+
 private:
   template
 	void
diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h b/libstdc++-v3/include/bits/shared_ptr_base.h
index 2698ba4..787dc9b 100644
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -1472,7 +1472,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   void
   _M_assign(_Tp* __ptr, const __shared_count<_Lp>& __refcount) noexcept
   {
-#define __cpp_lib_enable_shared_from_this 201603
 	if (use_count() == 0)
 	  {
 	_M_ptr = __ptr;
@@ -1557,6 +1556,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   shared_from_this() const
   { return __shared_ptr(this->_M_weak_this); }
 
+#if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
+  __weak_ptr<_Tp, _Lp>
+  weak_from_this()
+  { return this->_M_weak_this; }
+
+  __weak_ptr
+  weak_from_this() const
+  { return this->_M_weak_this; }
+#endif
+
 private:
   template
 	void
diff --git a/libstdc++-v3/testsuite/20_util/enable_shared_from_this/members/reinit.cc b/libstdc++-v3/testsuite/20_util/enable_shared_from_this/members/reinit.cc
index 1cf9148..3209f87 100644
--- a/libstdc++-v3/testsuite/20_util/enable_shared_from_this/members/reinit.cc
+++ b/libstdc++-v3/testsuite/20_util/enable_shared_from_this/members/reinit.cc
@@ -15,15 +15,11 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-options "-std=gnu++11" }
+// { dg-do run { target c++11 } }
 
 #include 
 #include 
 
-#if __cpp_lib_enable_shared_from_this < 201603
-# error "__cpp_lib_enable_shared_from_this < 201603"
-#endif
-
 struct X : public std::enable_shared_from_this { };
 
 bool


Re: Implement -Wimplicit-fallthrough (take 2): the rest

2016-08-04 Thread Michael Matz
Hi,

On Wed, 27 Jul 2016, Marek Polacek wrote:

> And this is the rest.  Either I just adjusted a falls through comment, 
> or I added __builtin_fallthrough ().  These were the cases where I was 
> fairly sure that the fall through is intentional.

I saw one case where I think the warning is a bit over-active:

@@ -42072,6 +42089,7 @@ rdseed_step:
 case IX86_BUILTIN_ADDCARRYX64:
   icode = CODE_FOR_addcarrydi;
   mode0 = DImode;
+  gcc_fallthrough ();
 
 handlecarry:
   arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in.  */

I.e. it also warns if the following label is not a case label but a normal 
one.  I don't think this counts as a classical fall-through and it IMHO 
should not be warned about nor should it be marked.


Ciao,
Michael.


[PATCH] Fix latent bugs

2016-08-04 Thread Richard Biener

The following fixes two latent bugs I ran into when fixing PR72772.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2016-08-04  Richard Biener  

* tree-cfgcleanup.c (tree_forwarder_block_p): Use bb_loop_header_p.
* cfghooks.c (force_nonfallthru): If we ended up splitting a latch
adjust loop info accordingly.

Index: gcc/tree-cfgcleanup.c
===
--- gcc/tree-cfgcleanup.c   (revision 239120)
+++ gcc/tree-cfgcleanup.c   (working copy)
@@ -344,7 +344,7 @@ tree_forwarder_block_p (basic_block bb,
 {
   basic_block dest;
   /* Protect loop headers.  */
-  if (bb->loop_father->header == bb)
+  if (bb_loop_header_p (bb))
return false;
 
   dest = EDGE_SUCC (bb, 0)->dest;
Index: gcc/cfghooks.c
===
--- gcc/cfghooks.c  (revision 239120)
+++ gcc/cfghooks.c  (working copy)
@@ -1030,11 +1030,17 @@ force_nonfallthru (edge e)
 
   if (current_loops != NULL)
{
+ basic_block pred = single_pred (ret);
+ basic_block succ = single_succ (ret);
  struct loop *loop
-   = find_common_loop (single_pred (ret)->loop_father,
-   single_succ (ret)->loop_father);
+   = find_common_loop (pred->loop_father, succ->loop_father);
  rescan_loop_exit (e, false, true);
  add_bb_to_loop (ret, loop);
+
+ /* If we split the latch edge of loop adjust the latch block.  */
+ if (loop->latch == pred
+ && loop->header == succ)
+   loop->latch = ret;
}
 }
 


Re: [PATCH GCC]Simplify interface for simplify_using_initial_conditions

2016-08-04 Thread Richard Biener
On Thu, Aug 4, 2016 at 10:40 AM, Bin.Cheng  wrote:
> On Wed, Aug 3, 2016 at 11:17 PM, Jeff Law  wrote:
>> On 08/03/2016 10:35 AM, Bin Cheng wrote:
>>>
>>> Hi,
>>> When I introduced parameter STOP for expand_simple_operations, I also
>>> added it for simplify_using_initial_conditions.  The STOP argument is also
>>> passed to simplify_using_initial_conditions in
>>> simple_iv_with_niters/loop_exits_before_overflow.  After analyzing case
>>> reported by PR72772, I think STOP expanding is only needed for
>>> expand_simple_operations when handling IV.step in tree-ssa-loop-ivopts.c.
>>> For other cases like calls to simplify_using_initial_condition, both cond
>>> and expr should be expanded to check tree expression equality.  This patch
>>> does so.  It simplifies interface by removing parameter STOP, also moves
>>> expand_simple_operations from tree_simplify_using_condition_1 to its caller.
>>>
>>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>>
>>> Thanks,
>>> bin
>>>
>>> 2016-08-02  Bin Cheng  
>>>
>>> PR tree-optimization/72772
>>> * tree-ssa-loop-niter.h (simplify_using_initial_conditions):
>>> Delete
>>> parameter STOP.
>>> * tree-ssa-loop-niter.c (tree_simplify_using_condition_1): Delete
>>> parameter STOP and update calls.  Move expand_simple_operations
>>> function call from here...
>>> (simplify_using_initial_conditions): ...to here.  Delete parameter
>>> STOP.
>>> (tree_simplify_using_condition): Delete parameter STOP.
>>> * tree-scalar-evolution.c (simple_iv_with_niters): Update call to
>>> simplify_using_initial_conditions.
>>>
>> OK.
>> jeff
>
> Thanks for reviewing.  Now I have a question about behavior of the
> interface.  Although by expanding both cond and expr, this patch
> catches more equality cases, it always returns expanded expr even it's
> not simplified, while the original behavior only returns simplified
> expr (not expanded).  For most use cases, it doesn't matter because we
> only care if the simplified result is TRUE or FALSE, but in
> computation of niter->assumption and niter->may_be_zeor, we may result
> in different (expanded) expressions.  Not sure how much this
> difference matters.  I can work on another version patch keeping the
> old behavior if it worth keeping.

It might result in additional redundant code to be generated when generating
versioning conditions from assumption or maybe_zero?  So yes, I think
the old behavior is worth preserving.

Richard.

> Thanks,
> bin


Re: [LTO] Add wide_int streaming support

2016-08-04 Thread Richard Biener
On Thu, Aug 4, 2016 at 11:32 AM, Jan Hubicka  wrote:
>> Hi Richard,
>>
>> Thanks for the review.
>>
>> On 04/08/16 17:26, Richard Biener wrote:
>> >On Thu, Aug 4, 2016 at 6:12 AM, kugan  
>> >wrote:
>> >>Hi,
>> >>
>> >>During IPA-VRP implementation, I realized that we don't support streaming
>> >>wide_int in LTO. Attached patch does this. Tested with IPA-VRP. Is this OK
>> >>for trunk if bootstrap and regression testing is fine.
>> >
>> >Hmm, those functions belong to data-streamer-{in,out}.c and data-streamer.h
>> >and should be named streamer_write_wide_int / streamer_read_wide_int.
>> >
>> >Note that we already have (non-exported) streamer_write_wi / 
>> >streamer_read_wi
>> >which operate on widest_ints.  Those also reside in lto-streamer-{in,out}.c 
>> >and
>> >should be moved to data-streamer.h (and be renamed to
>> >streamer_write_widest_int).
>>
>> I have now streamer_write_wide_int and streamer_write_widest_int.
>> Similarly for reading. There is lot of similarity. I am not very
>> familiar with wide_int so kept it that way. Is this OK now?
>>
>> Thanks,
>> Kugan
>>
>> gcc/ChangeLog:
>>
>> 2016-08-04  Kugan Vivekanandarajah  
>>
>>   * data-streamer-in.c (streamer_read_wide_int): New.
>>   (streamer_read_widest_int): Renamed function.
>>   * data-streamer-out.c (streamer_write_wide_int): New
>>   (streamer_write_widest_int): Renamed function.
>
> I wondered, given we do C++ now, if we don't want to just have
> stream_in/stream_out member functions for our classes and/or use just one
> function name for all of them so one does not need to look up somewhat
> irregular function names.
>
> I find LTO streaming API very hard to memorize and use without constantly
> looking up existing code.

Not sure if that would help given the arguments are different even besides the
thing you want to stream.

Richard.

> Honza


Re: [PATCH] Create x.gcov file for binary w/o x.gcda file (PR, gcov-profile/65831)

2016-08-04 Thread Nathan Sidwell

On 08/04/16 08:27, Martin Liška wrote:

Hi.

Following patch is grabbed from the PR, where I just applied the patch
and wrote a test-case which removes x.gcda file before running gcov tool.

Ready to be installed?


2016-08-04  Martin Liska  

* g++.dg/gcov/gcov-16.C: New test.
* lib/gcov.exp: Support new argument for run-gcov function.

gcc/ChangeLog:

2016-08-04  Martin Liska  
Adam Fineman  

* gcov.c (process_file): Create .gcov file when .gcda
file is missing.

ok thanks


Re: protected alloca class for malloc fallback

2016-08-04 Thread Richard Biener
On Thu, Aug 4, 2016 at 1:30 PM, Aldy Hernandez  wrote:
> Howdy!
>
> As part of my -Walloca-larger-than=life work, I've been running said pass
> over gcc, binutils, and gdb, and trying to fix things along the way.
>
> Particularly irritating and error prone is having to free malloc'd pointers
> on every function exit point.  We end up with a lot of:
>
> foo(size_t len)
> {
>   void *p, *m_p = NULL;
>   if (len < HUGE)
> p = alloca(len);
>   else
> p = m_p = malloc(len);
>   if (something)
> goto out;
>   stuff();
> out:
>   free (m_p);
> }
>
> ...which nobody really likes.
>
> I've been thinking that for GCC we could have a protected_alloca class whose
> destructor frees any malloc'd memory:
>
> void foo()
> {
>   char *p;
>   protected_alloca chunk(5);
>   p = (char *) chunk.pointer();
>   f(p);
> }
>
> This would generate:
>
> void foo() ()
> {
>   void * _3;
>
>   :
>   _3 = malloc (5);
>   f (_3);
>
>   :
>   free (_3); [tail call]
>   return;
> }
>
> Now the problem with this is that the memory allocated by chunk is freed
> when it goes out of scope, which may not be what you want.  For example:
>
>  func()
>  {
>char *str;
>{
>  protected_alloca chunk ();
>  // malloc'd pointer will be freed when chunk goes out of scope.
>  str = (char *) chunk.pointer ();
>}
>use (str);  // BAD!  Use after free.
>  }

But how's that an issue if the chunk is created at the exact place where there
previously was an alloca?

Your class also will not work when internal_alloc is not inlined and
the alloca path
is taken like when using non-GCC host compilers.

> In the attached patch implementing this class I have provided another idiom
> for avoiding this problem:
>
>  func()
>  {
>void *ptr;
>protected_alloca chunk;
>{
>  chunk.alloc (999);
>  str = (char *) chunk.pointer ();
>}
>// OK, pointer will be freed on function exit.
>use (str);
>  }
>
> So I guess it's between annoying gotos and keeping track of multiple exit
> points to a function previously calling alloca, or making sure the
> protected_alloca object always resides in the scope where the memory is
> going to be used.
>
> Is there a better blessed C++ way?  If not, is this OK?

It looks like you want to replace _all_ alloca uses?  What's the point
in doing this
at all?  Just to be able to enable the warning during bootstrap?

Having the conditional malloc/alloca will also inhibit optimization like eliding
the malloc or alloca calls completely.

Thanks,
Richard.

> Included is the conversion of tree.c.  More to follow once we agree on a
> solution.
>
> Tested on x86-64 Linux.
>
> Aldy


Re: [RFC] ipa bitwise constant propagation

2016-08-04 Thread Jan Hubicka
> I didn't look at the propagation part but eventually the IPA-CP
> lattice gets quite big.  Also the alignment lattice is very
> similar to the bits lattice so why not merge those two?  But

This was always the original idea to replace alignment propagation by bitwise
ccp.  I suppose we only have issue here because nonzero bits are not tracked for
pointers so we need to feed the original lattices by hand?

We could also make use of VR ranges and bits while evaultaing predicates
in ipa-inline-analysis. I can look into it after returning from Leeds.

Honza
> in the end it's Martins/Honzas call here.  Note there is
> trailing_wide_ints <> which could be used to improve memory usage
> based on the underlying type.
> 
> Thanks,
> Richard.


Re: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection, (PR gcov-profile/67992)

2016-08-04 Thread Nathan Sidwell

On 08/04/16 06:41, Martin Liška wrote:

On 08/03/2016 04:22 PM, Nathan Sidwell wrote:

Martin,

As I've going through all PRs related to gcov-profile, I've noticed this PR.
Current implementation of cycle detection in gcov is very poor, leading to 
extreme run time
for cases like mentioned in the PR (which does not contain a cycle). Thank to 
Joshua, I've
grabbed his patch and removed the scaffolding (classes: Arc, Block, ...) he 
did. After doing that
the patch is quite subtle and fast (of course).


sorry to be a pain, but could you split the patch into
a) formatting changes
b) the clever  bits

the formatting changes can then (probably) be applied as obvious.

nathan


This is second part which is the change of loop detection algorithm.


typedefs for arc and block pointer vectors would be useful to add.  They're used 
in a lot of  places:


typedef vector arc_vector_t;
typedef vector block_vector_t;

(question, should those be  'const T *' template parms?)

No need for vector of block vectors typedef, unless you think otherwise.

+/* Flag that drives cycle detection after a negative cycle is seen.  */
+static bool did_negate = false;

That's ugly, and I think unnecessary.  Use +1 for loop, -1 for negated loop, 0 
for no loop  (or a tri-valued enum with the right properties)


1) have handle_cycle return +1 (not negated) or -1 (negated) appropriately.

2) have circuit return an int similarly. Then
  if (w == start)
found |= handle_cycle (path, count);
  else if (...)
found |= circuit (...)
will DTRT there

3) finally have find_cycles merge the results from its circuit calls and 
determine whether to repeat itself -- rather than have the caller do it. (or 
have another reference parm to tell the caller?)


nathan



split test cases pr71078-1.c and pr71078-2.c

2016-08-04 Thread Prathamesh Kulkarni
Hi,
The attached patch splits each test-case into three, one for float,
double and long-double.
I verified that the long double tests are unsupported now for arm target.
OK to commit ?

Thanks,
Prathamesh
2016-08-04  Prathamesh Kulkarni  

testsuite/
* gcc.dg/tree-ssa/pr71078-1.c: Remove double and long double
test-cases.
* gcc.dg/tree-ssa/pr71078-1-double.c: New test-case.
* gcc.dg/tree-ssa/pr71078-1-long-double.c: Likewise.
* gcc.dg/tree-ssa/pr71078-2.c: Remove double and long double
test-cases.
* gcc.dg/tree-ssa/pr71078-2-double.c: New test-case.
* gcc.dg/tree-ssa/pr71078-2-long-double.c: Likewise.
* gcc.dg/tree-ssa/pr71078-3.c: Add require-effective-check for
large_double.

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr71078-1-double.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-1-double.c
new file mode 100644
index 000..3ef9efd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-1-double.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target large_double } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-forwprop-details" } */
+
+#include 
+
+double f2(double x)
+{
+  double t1 = fabs (x);
+  double t2 = x / t1;
+  return t2;
+}
+
+/* { dg-final { scan-tree-dump "__builtin_copysign" "forwprop1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr71078-1-long-double.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-1-long-double.c
new file mode 100644
index 000..a528246
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-1-long-double.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target large_long_double } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-forwprop-details" } */
+
+#include 
+
+long double f3 (long double x)
+{
+  long double t1 = fabsl (x);
+  long double t2 = x / t1;
+  return t2;
+}
+
+/* { dg-final { scan-tree-dump "__builtin_copysignl" "forwprop1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr71078-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-1.c
index 6204c14..10e3c35 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr71078-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-1.c
@@ -10,20 +10,4 @@ float f1(float x)
   return t2;
 }
  
-double f2(double x)
-{
-  double t1 = fabs (x);
-  double t2 = x / t1;
-  return t2;
-}
-
-long double f3 (long double x)
-{
-  long double t1 = fabsl (x);
-  long double t2 = x / t1;
-  return t2;
-}
-
 /* { dg-final { scan-tree-dump "__builtin_copysignf" "forwprop1" } } */
-/* { dg-final { scan-tree-dump "__builtin_copysign" "forwprop1" } } */
-/* { dg-final { scan-tree-dump "__builtin_copysignl" "forwprop1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr71078-2-double.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-2-double.c
new file mode 100644
index 000..4ef1e4d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-2-double.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target large_double } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-forwprop-details" } */
+
+#include 
+
+double f2(double x)
+{
+  double t1 = fabs (x);
+  double t2 = t1 / x; 
+  return t2;
+}
+
+/* { dg-final { scan-tree-dump "__builtin_copysign" "forwprop1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr71078-2-long-double.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-2-long-double.c
new file mode 100644
index 000..2eaf02d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-2-long-double.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target large_long_double } */
+/* { dg-options "-O2 -ffast-math -fdump-tree-forwprop-details" } */
+
+#include 
+
+long double f3 (long double x)
+{
+  long double t1 = fabsl (x);
+  long double t2 = t1 / x; 
+  return t2;
+}
+
+/* { dg-final { scan-tree-dump "__builtin_copysignl" "forwprop1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr71078-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-2.c
index 96485af..7ce61a1 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr71078-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-2.c
@@ -10,20 +10,4 @@ float f1(float x)
   return t2;
 }
  
-double f2(double x)
-{
-  double t1 = fabs (x);
-  double t2 = t1 / x; 
-  return t2;
-}
-
-long double f3 (long double x)
-{
-  long double t1 = fabsl (x);
-  long double t2 = t1 / x; 
-  return t2;
-}
-
 /* { dg-final { scan-tree-dump "__builtin_copysignf" "forwprop1" } } */
-/* { dg-final { scan-tree-dump "__builtin_copysign" "forwprop1" } } */
-/* { dg-final { scan-tree-dump "__builtin_copysignl" "forwprop1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr71078-3.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-3.c
index 8780b6a..8871a69 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr71078-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr71078-3.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target large_double } */
 /* { dg-options "-O2 -ffast-math -fdump-tree-forwprop-details" } */
 
 #include 


Re: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection, (PR gcov-profile/67992)

2016-08-04 Thread Jan Hubicka
> On 08/04/16 06:41, Martin Liška wrote:
> >On 08/03/2016 04:22 PM, Nathan Sidwell wrote:
> >>Martin,
> >>>As I've going through all PRs related to gcov-profile, I've noticed this 
> >>>PR.
> >>>Current implementation of cycle detection in gcov is very poor, leading to 
> >>>extreme run time
> >>>for cases like mentioned in the PR (which does not contain a cycle). Thank 
> >>>to Joshua, I've
> >>>grabbed his patch and removed the scaffolding (classes: Arc, Block, ...) 
> >>>he did. After doing that
> >>>the patch is quite subtle and fast (of course).
> >>
> >>sorry to be a pain, but could you split the patch into
> >>a) formatting changes
> >>b) the clever  bits
> >>
> >>the formatting changes can then (probably) be applied as obvious.
> >>
> >>nathan
> >
> >This is second part which is the change of loop detection algorithm.
> 
> typedefs for arc and block pointer vectors would be useful to add.
> They're used in a lot of  places:
> 
> typedef vector arc_vector_t;
> typedef vector block_vector_t;
> 
> (question, should those be  'const T *' template parms?)

What about trying to get naming scheme consistent with rest of GCC which call
those bbs and edges?  I know it is hard wired into -fprofile-arcs name but it
may be nice to get types more consistent.

Honza
> 
> No need for vector of block vectors typedef, unless you think otherwise.
> 
> +/* Flag that drives cycle detection after a negative cycle is seen.  */
> +static bool did_negate = false;
> 
> That's ugly, and I think unnecessary.  Use +1 for loop, -1 for
> negated loop, 0 for no loop  (or a tri-valued enum with the right
> properties)
> 
> 1) have handle_cycle return +1 (not negated) or -1 (negated) appropriately.
> 
> 2) have circuit return an int similarly. Then
>   if (w == start)
> found |= handle_cycle (path, count);
>   else if (...)
> found |= circuit (...)
> will DTRT there
> 
> 3) finally have find_cycles merge the results from its circuit calls
> and determine whether to repeat itself -- rather than have the
> caller do it. (or have another reference parm to tell the caller?)
> 
> nathan


[PATCH] Update C++17 library implementation status table

2016-08-04 Thread Jonathan Wakely

This adds all the features from the latest draft of SD-6, and
reformats the table to be closer to the tables at
https://gcc.gnu.org/projects/cxx-status.html (including adding the
feature-test macros).

* doc/xml/manual/status_cxx2017.xml: Update C++17 status table.
* doc/html/manual/status.html: Regenerate.

Committed to trunk.


commit 9d5280b3ecfbd679d00f98bd48c57a3151b1fcfd
Author: Jonathan Wakely 
Date:   Thu Aug 4 14:31:21 2016 +0100

Update C++17 library implementation status table

* doc/xml/manual/status_cxx2017.xml: Update C++17 status table.
* doc/html/manual/status.html: Regenerate.

diff --git a/libstdc++-v3/doc/xml/manual/status_cxx2017.xml 
b/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
index 60e5fae..d32399d 100644
--- a/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
+++ b/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
@@ -20,10 +20,21 @@ presence of the required flag.
 
 
 
-This page describes the C++1z and library TS support in mainline GCC SVN,
+This section describes the C++1z and library TS support in mainline GCC SVN,
 not in any particular release.
 
 
+
+The following table lists new library features that have been accepted into
+the C++1z working draft. The "Proposal" column provides a link to the
+ISO C++ committee proposal that describes the feature, while the "Status"
+column indicates the first version of GCC that contains an implementation of
+this feature (if it has been implemented).
+The "SD-6 Feature Test" column shows the corresponding macro or header from
+http://www.w3.org/1999/xlink"; 
xlink:href="https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations";>SD-6:
+Feature-testing recommendations for C++.
+
+
 
 C++ 201z Implementation Status
 
@@ -34,199 +45,661 @@ not in any particular release.
 
   
 
-  Paper
-  Title
+  Library Feature
+  Proposal
   Status
-  Comments
+  SD-6 Feature Test
 
   
 
   
 
 
+  
+  
+   constexpr 
std::hardware_{constructive,destructive}_interference_size
+  
+  
+   http://www.w3.org/1999/xlink"; 
xlink:href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0154r1.html";>
+   P0154R1
+   
+  
+   No 
+   __cpp_lib_hardware_interference_size >= 201603 

+
+
+
+  
+   Core Issue 1776: Replacement of class objects containing 
reference members
+  
+   http://www.w3.org/1999/xlink"; 
xlink:href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0137r1.html";>
+   P0137R1
+   
+  
+   No 
+   __cpp_lib_launder >= 201606 
+
+
+
+  Wording for std::uncaught_exceptions
   
http://www.w3.org/1999/xlink"; 
xlink:href="http://www.open-std.org/JTC1/sc22/WG21/docs/papers/2014/n4259.pdf";>
  N4259

   
-  Wording for std::uncaught_exceptions
-  Y
-  
+  6.1
+  __cpp_lib_uncaught_exceptions >= 201411
+
+
+
+  
+   Variant: a type-safe union for C++17 
+  
+   http://www.w3.org/1999/xlink"; 
xlink:href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0088r3.html";>
+   P0088R3
+   
+  
+   No 
+   __has_include() 
+
+
+
+   Library Fundamentals V1 TS Components: optional 

+  
+   http://www.w3.org/1999/xlink"; 
xlink:href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0220r1.html";>
+   P0220R1
+   
+  
+   7 
+   __has_include() 
+
+
+
+   Library Fundamentals V1 TS Components: any 
+  
+   http://www.w3.org/1999/xlink"; 
xlink:href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0220r1.html";>
+   P0220R1
+   
+  
+   7 
+   __has_include() 
+
+
+
+   Library Fundamentals V1 TS Components: string_view 

+  
+   http://www.w3.org/1999/xlink"; 
xlink:href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0220r1.html";>
+   P0220R1
+   
+  
+   7 
+   __has_include() 
+
+
+
+  
+   Library Fundamentals V1 TS Components: 
memory_resource 
+  
+   http://www.w3.org/1999/xlink"; 
xlink:href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0220r1.html";>
+   P0220R1
+   
+  
+   No 
+   __has_include() 
+
+
+
+   Constant View: A proposal for a std::as_const 
helper function template  
+  
+   http://www.w3.org/1999/xlink"; xlink:href="">
+   P0007R1
+   
+  
+   7 
+   __cpp_lib_as_const >= 201510 
+
+
+
+   Improving pair and tuple 
+  
+   http://www.w3.org/1999/xlink"; 
xlink:href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4387";>
+   N4387
+   
+  
+   6.1 
+   N/A 
+
+
+
+  
+   make_from_tuple: apply for construction 
+  
+   http://www.w3.org/1999/xlink";

Re: split test cases pr71078-1.c and pr71078-2.c

2016-08-04 Thread Richard Biener
On Thu, 4 Aug 2016, Prathamesh Kulkarni wrote:

> Hi,
> The attached patch splits each test-case into three, one for float,
> double and long-double.
> I verified that the long double tests are unsupported now for arm target.
> OK to commit ?

Ok.

Richard.

> Thanks,
> Prathamesh
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: Go patch committed: add escape analysis debugging

2016-08-04 Thread Rainer Orth
Hi Ian,

> This patch by Chris Manghane adds debugging to the escape analysis
> code.  This debugging is designed to generate the same sort of output
> as the gc Go compiler, for easier comparison of results.  Escape
> analysis is still not enabled by default.  Bootstrapped and ran Go
> testsuite on x86_64-pc-linux-gnu.  Committed to mainline.

this patch (resp. this particular line)

> Index: gcc/go/gofrontend/escape.cc
> ===
> --- gcc/go/gofrontend/escape.cc   (revision 238653)
> +++ gcc/go/gofrontend/escape.cc   (working copy)
> @@ -6,12 +6,14 @@
>  
>  #include 
>  #include 
> +#include 

broke Solaris bootstrap:

 from /vol/gcc/src/hg/trunk/local/gcc/go/go-system.h:23,
 from 
/vol/gcc/src/hg/trunk/local/gcc/go/gofrontend/go-linemap.h:10,
 from /vol/gcc/src/hg/trunk/local/gcc/go/gofrontend/gogo.h:10,
 from 
/vol/gcc/src/hg/trunk/local/gcc/go/gofrontend/escape.cc:11:
./auto-host.h:2214:0: error: "_FILE_OFFSET_BITS" redefined [-Werror]
 #define _FILE_OFFSET_BITS 64

In file included from 
/var/gcc/regression/trunk/12-gcc-gas/build/prev-gcc/include-fixed/wchar.h:17:0,
 from 
/var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/cwchar:44,
 from 
/var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/bits/postypes.h:40,
 from 
/var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/iosfwd:40,
 from 
/var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/ios:38,
 from 
/var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/istream:38,
 from 
/var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/sstream:38,
 from /vol/gcc/src/hg/trunk/local/gcc/go/gofrontend/escape.cc:9:
/var/gcc/regression/trunk/12-gcc-gas/build/prev-gcc/include-fixed/sys/feature_tests.h:223:0:
 note: this is the location of the previous definition
 #define _FILE_OFFSET_BITS 32

Including anything before "config.h" (or in the case of Go "go-system.h")
is fragile at best.

The following patch allowed me to compile escape.cc again.

Rainer


2016-08-04  Rainer Orth  

* gofrontend/escape.cc: Include "go-system.h" first.

diff --git a/gcc/go/gofrontend/escape.cc b/gcc/go/gofrontend/escape.cc
--- a/gcc/go/gofrontend/escape.cc
+++ b/gcc/go/gofrontend/escape.cc
@@ -4,6 +4,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+#include "go-system.h"
+
 #include 
 #include 
 #include 

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Teach VRP to truncate the case ranges of a switch

2016-08-04 Thread Richard Biener
On Thu, Aug 4, 2016 at 2:14 PM, Patrick Palka  wrote:
> On Thu, 4 Aug 2016, Richard Biener wrote:
>
>> On Thu, Aug 4, 2016 at 4:30 AM, Patrick Palka  wrote:
>> > On Wed, 3 Aug 2016, David Malcolm wrote:
>> >
>> >> On Wed, 2016-08-03 at 15:47 +0200, Richard Biener wrote:
>> >> > On Wed, Aug 3, 2016 at 6:00 AM, Patrick Palka 
>> >> > wrote:
>> >> > > VRP currently has functionality to eliminate case labels that lie
>> >> > > completely outside of the switch operand's value range.  This patch
>> >> > > complements this functionality by teaching VRP to also truncate the
>> >> > > case
>> >> > > label ranges that partially overlap with the operand's value range.
>> >> > >
>> >> > > Bootstrapped and regtested on x86_64-pc-linux-gnu.  Does this look
>> >> > > like
>> >> > > a reasonable optimization?  Admittedly, its effect will almost
>> >> > > always be
>> >> > > negligible except in cases where a case label range spans a large
>> >> > > number
>> >> > > of values which is a pretty rare thing.  The optimization triggered
>> >> > > about 250 times during bootstrap.
>> >> >
>> >> > I think it's most useful when the range collapses to a single value.
>> >> >
>> >> > Ok.
>> >>
>> >> Is this always an improvement?   I can see that it can simplify things,
>> >> eliminate dead code etc, but could it make evaluating the switch less
>> >> efficient?
>> >>
>> >> Consider e.g.
>> >>
>> >>  void
>> >>  test (char ch)
>> >>  {
>> >>if (ch > 17)
>> >>  return;
>> >>
>> >>switch (ch)
>> >>  {
>> >>  case 0:
>> >>foo (); break;
>> >>
>> >>  case 1 .. 255:
>> >>bar (); break;
>> >>  }
>> >>  }
>> >>
>> >> which (assuming this could survive this far in this form) previously
>> >> could be implemented as a simple "if (ch == 0)" but with this would get
>> >> simplified to:
>> >>
>> >>  void
>> >>  test (char ch)
>> >>  {
>> >>if (ch > 17)
>> >>  return;
>> >>
>> >>switch (ch)
>> >>  {
>> >>  case 0:
>> >>foo (); break;
>> >>
>> >>  case 1 .. 17:
>> >>bar (); break;
>> >>  }
>> >>  }
>> >>
>> >> which presumably introduces a compare against 17 in the implementation of 
>> >> the switch; does the new compare get optimized away by jump threading?
>> >
>> > In this particular example the final code does get worse with the patch
>> > for the reason you mentioned:
>> >
>> > Before:After:
>> > test:  test:
>> > .LFB0: .LFB0:
>> > .cfi_startproc .cfi_startproc
>> > cmpb$17, %dil  cmpb$17, %dil
>> > ja  .L1ja  .L1
>> > xorl%eax, %eax subl$1, %edi
>> > cmpb$1, %dil   xorl%eax, %eax
>> > jb  .L7cmpb$16, %dil
>> > jmp barja  .L7
>> > .p2align 4,,10 jmp bar
>> > .p2align 3 .p2align 4,,10
>> > .L7:   .p2align 3
>> > jmp foo.L7:
>> > .p2align 4,,10 jmp foo
>> > .p2align 3 .p2align 4,,10
>> > .L1:   .p2align 3
>> > rep ret.L1:
>> > .cfi_endproc   rep ret
>> >.cfi_endproc
>> >
>> > What's weird is that during gimplification the switch gets simplified to
>> >
>> >   switch (ch)
>> >   {
>> > default: foo (); break;
>> > case 1 ... 255: bar (); break;
>> >   }
>> >
>> > but if anything I would have expected it to get simplified to
>> >
>> >   switch (ch)
>> >   {
>> > case 0: foo (); break;
>> > default: bar (); break;
>> >   }
>> >
>> > In general, when case labels are exhaustive, maybe it would be better to
>> > designate the case label that has the widest range as the default label?
>> > (Currently preprocess_case_label_vec_for_gimple() just designates the
>> > very first label to be the default label.)  That would fix this
>> > particular regression at least.
>>
>> Yes, that looks useful - though I wonder how easy it is to detect for the
>> cases where there are more than one case/default.
>>
>> Richard.
>>
>
> Here's a patch that does this.  Does it look OK to commit after
> bootstrap + regtesting?

Ok.

Thanks,
Richard.

> -- >8 --
>
> gcc/ChangeLog:
>
> * gimple.c (preprocess_case_label_vec_for_gimple): When the case
> labels are exhaustive, designate the label with the widest
> range to be the default label.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/switch-11.c: New test.
>
> ---
>  gcc/gimple.c | 14 +-
>  gcc/testsuite/gcc.dg/switch-11.c | 22 ++
>  2 files changed, 35 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/switch-11.c
>
> diff --git a/gcc/gimple.c b/gcc/gimpl

Re: C++ OpenACC routine directive testing: templated, and "auto", trailing return type syntax

2016-08-04 Thread Thomas Schwinge
Hi!

On Tue, 12 Jul 2016 15:55:37 +0200, Jakub Jelinek  wrote:
> On Tue, Jul 12, 2016 at 03:50:14PM +0200, Thomas Schwinge wrote:
> > Templated, and "auto", trailing return type syntax with the C++ OpenACC
> > routine directive all works, but doesn't have test coverage.  OK for
> > trunk?

> > C++ OpenACC routine directive testing: templated, and "auto", trailing 
> > return type syntax

> Ok.

Committed to trunk in r239126:

commit ae6f822e37f1ab662ae467abcb5719c4cbf2d230
Author: tschwinge 
Date:   Thu Aug 4 13:35:10 2016 +

C++ OpenACC routine directive testing: templated, and "auto", trailing 
return type syntax

libgomp/
* testsuite/libgomp.oacc-c++/routine-1-auto.C: New file.
* testsuite/libgomp.oacc-c++/routine-1-template-auto.C: Likewise.
* testsuite/libgomp.oacc-c++/routine-1-template-trailing-return-type.C:
Likewise.
* testsuite/libgomp.oacc-c++/routine-1-template.C: Likewise.
* testsuite/libgomp.oacc-c++/routine-1-trailing-return-type.C:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/routine-1.c: Adjust.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@239126 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog|  9 +
 libgomp/testsuite/libgomp.oacc-c++/routine-1-auto.C  |  9 +
 libgomp/testsuite/libgomp.oacc-c++/routine-1-template-auto.C |  8 
 .../routine-1-template-trailing-return-type.C|  8 
 libgomp/testsuite/libgomp.oacc-c++/routine-1-template.C  |  8 
 .../libgomp.oacc-c++/routine-1-trailing-return-type.C|  9 +
 libgomp/testsuite/libgomp.oacc-c-c++-common/routine-1.c  | 12 ++--
 7 files changed, 61 insertions(+), 2 deletions(-)

diff --git libgomp/ChangeLog libgomp/ChangeLog
index cc76b7b..70e765e 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,5 +1,14 @@
 2016-08-04  Thomas Schwinge  
 
+   * testsuite/libgomp.oacc-c++/routine-1-auto.C: New file.
+   * testsuite/libgomp.oacc-c++/routine-1-template-auto.C: Likewise.
+   * testsuite/libgomp.oacc-c++/routine-1-template-trailing-return-type.C:
+   Likewise.
+   * testsuite/libgomp.oacc-c++/routine-1-template.C: Likewise.
+   * testsuite/libgomp.oacc-c++/routine-1-trailing-return-type.C:
+   Likewise.
+   * testsuite/libgomp.oacc-c-c++-common/routine-1.c: Adjust.
+
* testsuite/libgomp.oacc-c-c++-common/crash-1.c: Make it a "link"
test, and don't hardcode -O0.
 
diff --git libgomp/testsuite/libgomp.oacc-c++/routine-1-auto.C 
libgomp/testsuite/libgomp.oacc-c++/routine-1-auto.C
new file mode 100644
index 000..f4b54e5
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c++/routine-1-auto.C
@@ -0,0 +1,9 @@
+// Routine with "auto" return type.
+
+// { dg-additional-options "-fno-exceptions" }
+
+#define TEMPLATE
+#define TYPE int
+#define RETURN_1 auto
+#define RETURN_2
+#include "../libgomp.oacc-c-c++-common/routine-1.c"
diff --git libgomp/testsuite/libgomp.oacc-c++/routine-1-template-auto.C 
libgomp/testsuite/libgomp.oacc-c++/routine-1-template-auto.C
new file mode 100644
index 000..444f1f3
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c++/routine-1-template-auto.C
@@ -0,0 +1,8 @@
+// Templated routine with "auto" return type.
+
+// { dg-additional-options "-fno-exceptions" }
+
+#define TEMPLATE template
+#define RETURN_1 auto
+#define RETURN_2
+#include "../libgomp.oacc-c-c++-common/routine-1.c"
diff --git 
libgomp/testsuite/libgomp.oacc-c++/routine-1-template-trailing-return-type.C 
libgomp/testsuite/libgomp.oacc-c++/routine-1-template-trailing-return-type.C
new file mode 100644
index 000..bfe2787
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c++/routine-1-template-trailing-return-type.C
@@ -0,0 +1,8 @@
+// Templated routine using trailing return type syntax.
+
+// { dg-additional-options "-fno-exceptions" }
+
+#define TEMPLATE template
+#define RETURN_1 auto
+#define RETURN_2 -> TYPE
+#include "../libgomp.oacc-c-c++-common/routine-1.c"
diff --git libgomp/testsuite/libgomp.oacc-c++/routine-1-template.C 
libgomp/testsuite/libgomp.oacc-c++/routine-1-template.C
new file mode 100644
index 000..a7e0323
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c++/routine-1-template.C
@@ -0,0 +1,8 @@
+// Templated routine.
+
+// { dg-additional-options "-fno-exceptions" }
+
+#define TEMPLATE template
+#define RETURN_1 TYPE
+#define RETURN_2
+#include "../libgomp.oacc-c-c++-common/routine-1.c"
diff --git libgomp/testsuite/libgomp.oacc-c++/routine-1-trailing-return-type.C 
libgomp/testsuite/libgomp.oacc-c++/routine-1-trailing-return-type.C
new file mode 100644
index 000..3074ba4
--- /dev/null
+++ libgomp/testsuite/libgomp.oacc-c++/routine-1-trailing-return-type.C
@@ -0,0 +1,9 @@
+// Routine using trailing return type syntax.
+
+// { dg-additional-options "-fno-exceptions" }
+
+#define TEMPLATE
+#define TYPE int
+#define RETURN_1 auto
+#define R

Re: [Fortran, Patch, pr70524, v1] [5/6/7 Regression] ICE when using -frepack-arrays -Warray-temporaries

2016-08-04 Thread Dominique d'Humières
The patch works as advertised. It would be nice to have it reviewed and 
committed.

TIA

Dominique



Re: Tighten syntax checking for OpenACC routine construct in C

2016-08-04 Thread Thomas Schwinge
Hi!

On Tue, 24 May 2016 16:02:39 +0200, I wrote:
> Committed without changes in r236639:
> 
> commit c9d624bd2672463771546e73bf3d6446d64e43c0
> Author: tschwinge 
> Date:   Tue May 24 14:00:39 2016 +
> 
> Tighten syntax checking for OpenACC routine construct in C

Backported to gomp-4_0-branch in r239131:

commit 7d092bfdda42f5d3baf53ab3fe7d0ac941bff872
Author: tschwinge 
Date:   Thu Aug 4 13:49:36 2016 +

Tighten syntax checking for OpenACC routine construct in C

Backport trunk r236639:

gcc/c/
* c-parser.c (c_parser_oacc_routine): Tighten syntax checks.
gcc/testsuite/
* c-c++-common/goacc/routine-5.c: Add tests.
* g++.dg/goacc/routine-2.C: Remove duplicate tests.
* gfortran.dg/goacc/routine-6.f90: Add tests.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@239131 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/c/ChangeLog.gomp  |  5 +
 gcc/c/c-parser.c  | 19 +--
 gcc/testsuite/ChangeLog.gomp  |  7 +++
 gcc/testsuite/c-c++-common/goacc/routine-5.c  | 21 +
 gcc/testsuite/g++.dg/goacc/routine-2.C|  6 --
 gcc/testsuite/gfortran.dg/goacc/routine-6.f90 |  7 +++
 6 files changed, 45 insertions(+), 20 deletions(-)

diff --git gcc/c/ChangeLog.gomp gcc/c/ChangeLog.gomp
index 65c6bb8..4a4bb24 100644
--- gcc/c/ChangeLog.gomp
+++ gcc/c/ChangeLog.gomp
@@ -1,3 +1,8 @@
+2016-08-04  Thomas Schwinge  
+
+   Backport trunk r236639:
+   * c-parser.c (c_parser_oacc_routine): Tighten syntax checks.
+
 2016-07-15  Cesar Philippidis  
 
Backport from trunk:
diff --git gcc/c/c-parser.c gcc/c/c-parser.c
index 58c14ff..20b31dd 100644
--- gcc/c/c-parser.c
+++ gcc/c/c-parser.c
@@ -14170,25 +14170,24 @@ c_parser_oacc_routine (c_parser *parser, enum 
pragma_context context)
   c_parser_consume_token (parser);
 
   c_token *token = c_parser_peek_token (parser);
-
   if (token->type == CPP_NAME && (token->id_kind == C_ID_ID
  || token->id_kind == C_ID_TYPENAME))
{
  decl = lookup_name (token->value);
  if (!decl)
-   {
- error_at (token->location, "%qE has not been declared",
-   token->value);
- decl = error_mark_node;
-   }
+   error_at (token->location, "%qE has not been declared",
+ token->value);
+ c_parser_consume_token (parser);
}
   else
c_parser_error (parser, "expected function name");
 
-  if (token->type != CPP_CLOSE_PAREN)
-   c_parser_consume_token (parser);
-
-  c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, 0);
+  if (!decl
+ || !c_parser_require (parser, CPP_CLOSE_PAREN, "expected %<)%>"))
+   {
+ c_parser_skip_to_pragma_eol (parser, false);
+ return;
+   }
 }
 
   /* Build a chain of clauses.  */
diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index b197955..1b59ffa 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,3 +1,10 @@
+2016-08-04  Thomas Schwinge  
+
+   Backport trunk r236639:
+   * c-c++-common/goacc/routine-5.c: Add tests.
+   * g++.dg/goacc/routine-2.C: Remove duplicate tests.
+   * gfortran.dg/goacc/routine-6.f90: Add tests.
+
 2016-07-29  Chung-Lin Tang  
 
PR fortran/70598
diff --git gcc/testsuite/c-c++-common/goacc/routine-5.c 
gcc/testsuite/c-c++-common/goacc/routine-5.c
index 2a9db90..1efd154 100644
--- gcc/testsuite/c-c++-common/goacc/routine-5.c
+++ gcc/testsuite/c-c++-common/goacc/routine-5.c
@@ -38,13 +38,26 @@ namespace g {}
 #pragma acc routine /* { dg-error "not followed by" "" { target c++ } } */
 using namespace g;
 
-#pragma acc routine (g) /* { dg-error "does not refer to" "" { target c++ } } 
*/
+#pragma acc routine (g) /* { dg-error "does not refer to a function" "" { 
target c++ } } */
 
-#endif
+#endif /* __cplusplus */
 
-#pragma acc routine (a) /* { dg-error "does not refer to" } */
+#pragma acc routine (a) /* { dg-error "does not refer to a function" } */
   
-#pragma acc routine (c) /* { dg-error "does not refer to" } */
+#pragma acc routine (c) /* { dg-error "does not refer to a function" } */
+
+
+#pragma acc routine () vector /* { dg-error "expected (function 
name|unqualified-id) before .\\). token" } */
+
+#pragma acc routine (+) /* { dg-error "expected (function name|unqualified-id) 
before .\\+. token" } */
+
+
+extern void R1(void);
+extern void R2(void);
+#pragma acc routine (R1, R2, R3) worker /* { dg-error "expected .\\). before 
.,. token" } */
+#pragma acc routine (R1 R2 R3) worker /* { dg-error "expected .\\). before 
.R2." } */
+#pragma acc routine (R1) worker
+#pragma acc routine (R2) worker
 
 
 void Bar ();
diff --git gcc/testsuite/g++.dg/goacc/routine-2.C 
gcc/testsuite/g++.dg/goacc/routine-2.C
index 3a8bbdd

Re: C/C++: Simplify handling of location information for OpenACC routine directives

2016-08-04 Thread Thomas Schwinge
Hi!

On Wed, 13 Jul 2016 11:25:46 +0200, I wrote:
> C/C++: Simplify handling of location information for OpenACC routine 
> directives

Without changes, committed to trunk in r239127:

commit 5f429ee2993ea1795d88c5589251c500e6e9062a
Author: tschwinge 
Date:   Thu Aug 4 13:35:19 2016 +

C/C++: Simplify handling of location information for OpenACC routine 
directives

gcc/c/
* c-parser.c (struct oacc_routine_data): New.
(c_parser_declaration_or_fndef, c_parser_oacc_routine): Use it.
Simplify code.
(c_finish_oacc_routine): Likewise.  Don't attach clauses to "omp
declare target" attribute.
gcc/cp/
* parser.h (struct cp_omp_declare_simd_data): New.
(struct cp_parser): Use it for oacc_routine member.
* parser.c (cp_ensure_no_oacc_routine, cp_parser_oacc_routine)
(cp_parser_late_parsing_oacc_routine, cp_finalize_oacc_routine):
Use it.  Simplify code.
(cp_parser_new): Initialize all members pointing to special
parsing data structures.
(cp_parser_cilk_simd_fn_vector_attrs): Initialize
parser->cilk_simd_fn_info->clauses.
(cp_parser_omp_declare_simd): Initialize
parser->omp_declare_simd->clauses.
(cp_parser_late_parsing_omp_declare_simd): Simplify code.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@239127 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/c/ChangeLog  |   8 +
 gcc/c/c-parser.c |  86 ++--
 gcc/cp/ChangeLog |  15 
 gcc/cp/parser.c  | 108 ---
 gcc/cp/parser.h  |  21 ++-
 5 files changed, 126 insertions(+), 112 deletions(-)

diff --git gcc/c/ChangeLog gcc/c/ChangeLog
index 1c0688b..7ef094a 100644
--- gcc/c/ChangeLog
+++ gcc/c/ChangeLog
@@ -1,3 +1,11 @@
+2016-08-04  Thomas Schwinge  
+
+   * c-parser.c (struct oacc_routine_data): New.
+   (c_parser_declaration_or_fndef, c_parser_oacc_routine): Use it.
+   Simplify code.
+   (c_finish_oacc_routine): Likewise.  Don't attach clauses to "omp
+   declare target" attribute.
+
 2016-08-01  Jan Beulich  
 
* c-fold.c (c_fully_fold_internal): Also emit shift count
diff --git gcc/c/c-parser.c gcc/c/c-parser.c
index cc68912..c4a9797 100644
--- gcc/c/c-parser.c
+++ gcc/c/c-parser.c
@@ -1274,11 +1274,17 @@ enum c_parser_prec {
   NUM_PRECS
 };
 
+/* Helper data structure for parsing #pragma acc routine.  */
+struct oacc_routine_data {
+  tree clauses;
+  location_t loc;
+};
+
 static void c_parser_external_declaration (c_parser *);
 static void c_parser_asm_definition (c_parser *);
 static void c_parser_declaration_or_fndef (c_parser *, bool, bool, bool,
   bool, bool, tree *, vec,
-  tree = NULL_TREE);
+  struct oacc_routine_data * = NULL);
 static void c_parser_static_assert_declaration_no_semi (c_parser *);
 static void c_parser_static_assert_declaration (c_parser *);
 static void c_parser_declspecs (c_parser *, struct c_declspecs *, bool, bool,
@@ -1370,7 +1376,7 @@ static bool c_parser_omp_target (c_parser *, enum 
pragma_context, bool *);
 static void c_parser_omp_end_declare_target (c_parser *);
 static void c_parser_omp_declare (c_parser *, enum pragma_context);
 static bool c_parser_omp_ordered (c_parser *, enum pragma_context, bool *);
-static void c_parser_oacc_routine (c_parser *parser, enum pragma_context);
+static void c_parser_oacc_routine (c_parser *, enum pragma_context);
 
 /* These Objective-C parser functions are only ever called when
compiling Objective-C.  */
@@ -1562,7 +1568,8 @@ c_parser_external_declaration (c_parser *parser)
 }
 
 static void c_finish_omp_declare_simd (c_parser *, tree, tree, vec);
-static void c_finish_oacc_routine (c_parser *, tree, tree, bool, bool, bool);
+static void c_finish_oacc_routine (struct oacc_routine_data *, tree, bool,
+  bool, bool);
 
 /* Parse a declaration or function definition (C90 6.5, 6.7.1, C99
6.7, 6.9.1).  If FNDEF_OK is true, a function definition is
@@ -1641,7 +1648,7 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
   bool nested, bool start_attr_ok,
   tree *objc_foreach_object_declaration,
   vec omp_declare_simd_clauses,
-  tree oacc_routine_clauses)
+  struct oacc_routine_data *oacc_routine_data)
 {
   struct c_declspecs *specs;
   tree prefix_attrs;
@@ -1746,9 +1753,9 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
  pedwarn (here, 0, "empty declaration");
}
   c_parser_consume_token (parser);
-  if (oacc_routine_clauses)
-   c_finish_oacc_routine (parser, NULL_TREE,
-  oacc_routine_clauses, fals

Re: Rework C/C++ OpenACC routine parsing

2016-08-04 Thread Thomas Schwinge
Hi!

On Fri, 22 Jul 2016 16:22:18 +0200, Jakub Jelinek  wrote:
> On Wed, Jul 13, 2016 at 04:10:31PM +0200, Thomas Schwinge wrote:
> > @@ -14029,29 +14032,32 @@ c_parser_oacc_kernels_parallel (location_t loc, 
> > c_parser *parser,
> >  static void
> >  c_parser_oacc_routine (c_parser *parser, enum pragma_context context)
> >  {

> >if (c_parser_peek_token (parser)->type == CPP_OPEN_PAREN)
> 
> Can you please change this to
>   if (c_parser_next_token_is (parser, CPP_OPEN_PAREN))
> ?
> Ok for trunk with that change.

With that changed, committed to trunk in r239128:

commit 1fa5d8ba154d3b6a3a0b8233aea4c565a881f312
Author: tschwinge 
Date:   Thu Aug 4 13:35:30 2016 +

Rework C/C++ OpenACC routine parsing

gcc/c/
* c-parser.c (struct oacc_routine_data): Add error_seen and
fndecl_seen members.
(c_finish_oacc_routine): Use these.
(c_parser_declaration_or_fndef): Adjust.
(c_parser_oacc_routine): Likewise.  Support more C language
constructs, and improve diagnostics.  Move pragma context
checking...
(c_parser_pragma): ... here.
gcc/cp/
* parser.c (cp_ensure_no_oacc_routine): Improve diagnostics.
(cp_parser_late_parsing_cilk_simd_fn_info): Fix diagnostics.
(cp_parser_late_parsing_oacc_routine, cp_finalize_oacc_routine):
Simplify code, and improve diagnostics.
(cp_parser_oacc_routine): Likewise.  Move pragma context
checking...
(cp_parser_pragma): ... here.
gcc/testsuite/
* c-c++-common/goacc/routine-5.c: Update.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@239128 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/c/ChangeLog  |   9 ++
 gcc/c/c-parser.c | 163 +++---
 gcc/cp/ChangeLog |   8 ++
 gcc/cp/parser.c  | 180 +++-
 gcc/testsuite/ChangeLog  |   4 +
 gcc/testsuite/c-c++-common/goacc/routine-5.c | 199 +++
 6 files changed, 390 insertions(+), 173 deletions(-)

diff --git gcc/c/ChangeLog gcc/c/ChangeLog
index 7ef094a..ecae4f1 100644
--- gcc/c/ChangeLog
+++ gcc/c/ChangeLog
@@ -1,5 +1,14 @@
 2016-08-04  Thomas Schwinge  
 
+   * c-parser.c (struct oacc_routine_data): Add error_seen and
+   fndecl_seen members.
+   (c_finish_oacc_routine): Use these.
+   (c_parser_declaration_or_fndef): Adjust.
+   (c_parser_oacc_routine): Likewise.  Support more C language
+   constructs, and improve diagnostics.  Move pragma context
+   checking...
+   (c_parser_pragma): ... here.
+
* c-parser.c (struct oacc_routine_data): New.
(c_parser_declaration_or_fndef, c_parser_oacc_routine): Use it.
Simplify code.
diff --git gcc/c/c-parser.c gcc/c/c-parser.c
index c4a9797..ec74e0b 100644
--- gcc/c/c-parser.c
+++ gcc/c/c-parser.c
@@ -1276,6 +1276,8 @@ enum c_parser_prec {
 
 /* Helper data structure for parsing #pragma acc routine.  */
 struct oacc_routine_data {
+  bool error_seen; /* Set if error has been reported.  */
+  bool fndecl_seen; /* Set if one fn decl/definition has been seen already.  */
   tree clauses;
   location_t loc;
 };
@@ -1568,8 +1570,7 @@ c_parser_external_declaration (c_parser *parser)
 }
 
 static void c_finish_omp_declare_simd (c_parser *, tree, tree, vec);
-static void c_finish_oacc_routine (struct oacc_routine_data *, tree, bool,
-  bool, bool);
+static void c_finish_oacc_routine (struct oacc_routine_data *, tree, bool);
 
 /* Parse a declaration or function definition (C90 6.5, 6.7.1, C99
6.7, 6.9.1).  If FNDEF_OK is true, a function definition is
@@ -1754,8 +1755,7 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
}
   c_parser_consume_token (parser);
   if (oacc_routine_data)
-   c_finish_oacc_routine (oacc_routine_data, NULL_TREE, false, true,
-  false);
+   c_finish_oacc_routine (oacc_routine_data, NULL_TREE, false);
   return;
 }
 
@@ -1853,7 +1853,7 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
   prefix_attrs = specs->attrs;
   all_prefix_attrs = prefix_attrs;
   specs->attrs = NULL_TREE;
-  for (bool first = true;; first = false)
+  while (true)
 {
   struct c_declarator *declarator;
   bool dummy = false;
@@ -1873,8 +1873,7 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
c_finish_omp_declare_simd (parser, NULL_TREE, NULL_TREE,
   omp_declare_simd_clauses);
  if (oacc_routine_data)
-   c_finish_oacc_routine (oacc_routine_data, NULL_TREE,
-  false, first, false);
+   c_finish_oacc_routine (oacc_routine_data, NULL_TREE, false);
  c_parser_skip_to_end_of_block_or_statement (parser);
  return;
}
@@ -19

Re: Test cases to check OpenACC offloaded function's attributes and classification

2016-08-04 Thread Thomas Schwinge
Hi!

Ping.

On Wed, 27 Jul 2016 10:59:02 +0200, I wrote:
> Hi!
> 
> OK for trunk?
> 
> commit 8200af082db5438be18bc60f721fcf21641c0d86
> Author: Thomas Schwinge 
> Date:   Tue Jul 26 17:18:21 2016 +0200
> 
> Test cases to check OpenACC offloaded function's attributes and 
> classification
> 
>   gcc/testsuite/
>   * c-c++-common/goacc/oaccdevlow-kernels.c: New file.
>   * c-c++-common/goacc/oaccdevlow-parallel.c: Likewise.
>   * c-c++-common/goacc/oaccdevlow-routine.c: Likewise.
>   * gfortran.dg/goacc/oaccdevlow-kernels.f95: Likewise.
>   * gfortran.dg/goacc/oaccdevlow-parallel.f95: Likewise.
>   * gfortran.dg/goacc/oaccdevlow-routine.f95: Likewise.
> ---
>  .../c-c++-common/goacc/oaccdevlow-kernels.c| 34 
>  .../c-c++-common/goacc/oaccdevlow-parallel.c   | 27 
>  .../c-c++-common/goacc/oaccdevlow-routine.c| 29 +
>  .../gfortran.dg/goacc/oaccdevlow-kernels.f95   | 36 
> ++
>  .../gfortran.dg/goacc/oaccdevlow-parallel.f95  | 29 +
>  .../gfortran.dg/goacc/oaccdevlow-routine.f95   | 28 +
>  6 files changed, 183 insertions(+)
> 
> diff --git gcc/testsuite/c-c++-common/goacc/oaccdevlow-kernels.c 
> gcc/testsuite/c-c++-common/goacc/oaccdevlow-kernels.c
> new file mode 100644
> index 000..14d650a
> --- /dev/null
> +++ gcc/testsuite/c-c++-common/goacc/oaccdevlow-kernels.c
> @@ -0,0 +1,34 @@
> +/* Check offloaded function's attributes and classification for OpenACC
> +   kernels.  */
> +
> +/* { dg-additional-options "-O2" }
> +   { dg-additional-options "-fdump-tree-ompexp" }
> +   { dg-additional-options "-fdump-tree-parloops1-all" }
> +   { dg-additional-options "-fdump-tree-oaccdevlow" } */
> +
> +#define N (1024 * 512)
> +
> +extern unsigned int *__restrict a;
> +extern unsigned int *__restrict b;
> +extern unsigned int *__restrict c;
> +
> +void KERNELS ()
> +{
> +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N])
> +  for (unsigned int i = 0; i < N; i++)
> +c[i] = a[i] + b[i];
> +}
> +
> +/* Check the offloaded function's attributes.
> +   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target 
> entrypoint\\)\\)" 1 "ompexp" } } */
> +
> +/* Check that exactly one OpenACC kernels loop is analyzed, and that it can 
> be
> +   parallelized.
> +   { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 
> "parloops1" } }
> +   { dg-final { scan-tree-dump-times "(?n)oacc function \\(0," 1 "parloops1" 
> } }
> +   { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
> +
> +/* Check the offloaded function's classification and compute dimensions (will
> +   always be [1, 1, 1] for target compilation).
> +   { dg-final { scan-tree-dump-times "(?n)Function is kernels offload" 1 
> "oaccdevlow" } }
> +   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 
> 1\\\]" 1 "oaccdevlow" } } */
> diff --git gcc/testsuite/c-c++-common/goacc/oaccdevlow-parallel.c 
> gcc/testsuite/c-c++-common/goacc/oaccdevlow-parallel.c
> new file mode 100644
> index 000..63c372a
> --- /dev/null
> +++ gcc/testsuite/c-c++-common/goacc/oaccdevlow-parallel.c
> @@ -0,0 +1,27 @@
> +/* Check offloaded function's attributes and classification for OpenACC
> +   parallel.  */
> +
> +/* { dg-additional-options "-O2" }
> +   { dg-additional-options "-fdump-tree-ompexp" }
> +   { dg-additional-options "-fdump-tree-oaccdevlow" } */
> +
> +#define N (1024 * 512)
> +
> +extern unsigned int *__restrict a;
> +extern unsigned int *__restrict b;
> +extern unsigned int *__restrict c;
> +
> +void PARALLEL ()
> +{
> +#pragma acc parallel loop copyin (a[0:N], b[0:N]) copyout (c[0:N])
> +  for (unsigned int i = 0; i < N; i++)
> +c[i] = a[i] + b[i];
> +}
> +
> +/* Check the offloaded function's attributes.
> +   { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(omp target 
> entrypoint\\)\\)" 1 "ompexp" } } */
> +
> +/* Check the offloaded function's classification and compute dimensions (will
> +   always be [1, 1, 1] for target compilation).
> +   { dg-final { scan-tree-dump-times "(?n)Function is parallel offload" 1 
> "oaccdevlow" } }
> +   { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 
> 1\\\]" 1 "oaccdevlow" } } */
> diff --git gcc/testsuite/c-c++-common/goacc/oaccdevlow-routine.c 
> gcc/testsuite/c-c++-common/goacc/oaccdevlow-routine.c
> new file mode 100644
> index 000..fa2eae7
> --- /dev/null
> +++ gcc/testsuite/c-c++-common/goacc/oaccdevlow-routine.c
> @@ -0,0 +1,29 @@
> +/* Check offloaded function's attributes and classification for OpenACC
> +   routine.  */
> +
> +/* { dg-additional-options "-O2" }
> +   { dg-additional-options "-fdump-tree-ompexp" }
> +   { dg-additional-options "-fdump-tree-oaccdevlow" } */
> +
> +#define N (1024 * 512)
> +
> +extern unsigned int *__restrict a;
> +extern unsigned int *__restrict b;
> +extern unsigned int *__restrict c;
> +#pragma acc declare c

Re: Use "oacc kernels" attribute for OpenACC kernels

2016-08-04 Thread Thomas Schwinge
Hi!

Ping.

On Wed, 27 Jul 2016 12:06:59 +0200, I wrote:
> On Mon, 25 Jan 2016 16:09:14 +0100, Jakub Jelinek  wrote:
> > On Mon, Jan 25, 2016 at 10:06:50AM -0500, Nathan Sidwell wrote:
> > > On 01/04/16 10:39, Nathan Sidwell wrote:
> > > >There's currently no robust predicate to determine whether an oacc 
> > > >offload
> > > >function is for a kernels region (as opposed to a parallel region).
> > > >[...]
> > > >
> > > >This patch marks TREE_PUBLIC on the offload attribute values, to note 
> > > >kernels
> > > >regions,  and adds a predicate to check that.  [...]
> > > >
> > > >Using these predicates improves the dump output of the openacc device 
> > > >lowering
> > > >pass too.
> 
> I just submitted a patch adding "Test cases to check OpenACC offloaded
> function's attributes and classification",
> ,
> to actually check the dump output of "oaccdevlow" -- it works.  ;-)
> 
> > > https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00092.html
> > > ping?
> > 
> > Ok, thanks.
> 
> It's conceptually and code-wise simpler to just use a "oacc kernels"
> attribute for that.  (And, that will make another patch I'm working on
> less convoluted.)
> 
> I'm open to suggestions if there is a better place to set the "oacc
> kernels" attribute -- I put it into expand_omp_target, where another
> special thing for GF_OMP_TARGET_KIND_OACC_KERNELS is already being done,
> and before "rewriting" GF_OMP_TARGET_KIND_OACC_KERNELS (and
> GF_OMP_TARGET_KIND_OACC_PARALLEL) into BUILT_IN_GOACC_PARALLEL.  My
> reasoning for not setting the attribute earlier (like, in the front
> ends), is that at that point in/before expand_omp_target, we still have
> the distrinction between OACC_PARALLEL/OACC_KERNELS (tree codes), and
> later GF_OMP_TARGET_KIND_OACC_PARALLEL/GF_OMP_TARGET_KIND_OACC_KERNELS
> (GIMPLE_OMP_TARGET subcodes).  Another question/possibly cleanup of
> course might be to actually do set the "oacc kernels" attribute in the
> front end and merge OACC_KERNELS into OACC_PARALLEL, and
> GF_OMP_TARGET_KIND_OACC_KERNELS into GF_OMP_TARGET_KIND_OACC_PARALLEL?
> 
> But anyway, as a first step: OK for trunk?
> 
> commit 2e6dc8dfd679d8dae814e325afa2547b502827ef
> Author: Thomas Schwinge 
> Date:   Tue Jul 26 17:44:31 2016 +0200
> 
> Use "oacc kernels" attribute for OpenACC kernels
> 
>   gcc/
>   * omp-low.c (expand_omp_target) :
>   Set "oacc kernels" attribute.
>   (set_oacc_fn_attrib): Remove is_kernel formal parameter.  Adjust
>   all users.
>   (oacc_fn_attrib_kernels_p): Remove function.
>   (execute_oacc_device_lower): Look for "oacc kernels" attribute
>   instead of calling oacc_fn_attrib_kernels_p.
>   * tree-ssa-loop.c (gate_oacc_kernels): Likewise.
>   * tree-parloops.c (create_parallel_loop): If oacc_kernels_p,
>   assert "oacc kernels" attribute is set.
> ---
>  gcc/omp-low.c  | 53 
> --
>  gcc/omp-low.h  |  3 +-
>  gcc/tree-parloops.c|  5 +-
>  gcc/tree-ssa-loop.c|  5 +-
>  10 files changed, 34 insertions(+), 48 deletions(-)
> 
> diff --git gcc/omp-low.c gcc/omp-low.c
> index c75452c..a35556d 100644
> --- gcc/omp-low.c
> +++ gcc/omp-low.c
> @@ -12552,11 +12552,10 @@ replace_oacc_fn_attrib (tree fn, tree dims)
>  
>  /* Scan CLAUSES for launch dimensions and attach them to the oacc
> function attribute.  Push any that are non-constant onto the ARGS
> -   list, along with an appropriate GOMP_LAUNCH_DIM tag.  IS_KERNEL is
> -   true, if these are for a kernels region offload function.  */
> +   list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
>  
>  void
> -set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec *args)
> +set_oacc_fn_attrib (tree fn, tree clauses, vec *args)
>  {
>/* Must match GOMP_DIM ordering.  */
>static const omp_clause_code ids[]
> @@ -12581,9 +12580,6 @@ set_oacc_fn_attrib (tree fn, tree clauses, bool 
> is_kernel, vec *args)
> non_const |= GOMP_DIM_MASK (ix);
>   }
>attr = tree_cons (NULL_TREE, dim, attr);
> -  /* Note kernelness with TREE_PUBLIC.  */
> -  if (is_kernel)
> - TREE_PUBLIC (attr) = 1;
>  }
>  
>replace_oacc_fn_attrib (fn, attr);
> @@ -12652,16 +12648,6 @@ get_oacc_fn_attrib (tree fn)
>return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
>  }
>  
> -/* Return true if this oacc fn attrib is for a kernels offload
> -   region.  We use the TREE_PUBLIC flag of each dimension -- only
> -   need to check the first one.  */
> -
> -bool
> -oacc_fn_attrib_kernels_p (tree attr)
> -{
> -  return TREE_PUBLIC (TREE_VALUE (attr));
> -}
> -
>  /* Return level at which oacc routine may spawn a partitioned loop, or
> -1 if it is not a routine (i.e. is an offload fn).  */
>  
> @@ -13044,7 +13030,12 @@ expand_omp_target (str

Re: [PTX] fix worker propagation ICE

2016-08-04 Thread Thomas Schwinge
Hi!

On Wed, 3 Aug 2016 13:30:10 -0400, Nathan Sidwell  wrote:
> --- libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c (nonexistent)
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c (working copy)
> @@ -0,0 +1,28 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0" } */

Offloading compilation happens at link time not compile time, and in
OpenACC libgomp testing, we're doing a limited set of torture testing
(-O0, -O2), so no point in hardcoding -O0 here.

As obvious, committed to trunk in r239125:

commit ec8d61ace153843dcaaba86ad926f384a4affee3
Author: tschwinge 
Date:   Thu Aug 4 13:34:57 2016 +

Make libgomp.oacc-c-c++-common/crash-1.c a "link" test, and don't hardcode 
-O0

libgomp/
* testsuite/libgomp.oacc-c-c++-common/crash-1.c: Make it a "link"
test, and don't hardcode -O0.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@239125 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog | 5 +
 libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c | 5 ++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git libgomp/ChangeLog libgomp/ChangeLog
index 850188f..cc76b7b 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,3 +1,8 @@
+2016-08-04  Thomas Schwinge  
+
+   * testsuite/libgomp.oacc-c-c++-common/crash-1.c: Make it a "link"
+   test, and don't hardcode -O0.
+
 2016-08-03  Nathan Sidwell  
 
* testsuite/libgomp.oacc-c-c++-common/crash-1.c: New.
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c 
libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c
index a75a817..dcf1485 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c
@@ -1,7 +1,6 @@
-/* { dg-do compile } */
-/* { dg-options "-O0" } */
+/* { dg-do link } */
 
-/* ICEd in nvptx backend due to unexpected frame size.  */
+/* For -O0, ICEd in nvptx backend due to unexpected frame size.  */
 #pragma acc routine worker
 void
 worker_matmul (int *c, int i)

Backported to gomp-4_0-branch in r239129:

commit bab445509b917c582a53834599f614ce2c29ff36
Author: tschwinge 
Date:   Thu Aug 4 13:49:15 2016 +

Make libgomp.oacc-c-c++-common/crash-1.c a "link" test, and don't hardcode 
-O0

Backport trunk r239125:

libgomp/
* testsuite/libgomp.oacc-c-c++-common/crash-1.c: Make it a "link"
test, and don't hardcode -O0.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@239129 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog.gomp| 6 ++
 libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c | 5 ++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp
index 7a7d859..4320237 100644
--- libgomp/ChangeLog.gomp
+++ libgomp/ChangeLog.gomp
@@ -1,3 +1,9 @@
+2016-08-04  Thomas Schwinge  
+
+   Backport trunk r239125:
+   * testsuite/libgomp.oacc-c-c++-common/crash-1.c: Make it a "link"
+   test, and don't hardcode -O0.
+
 2016-08-03  Nathan Sidwell  
 
* testsuite/libgomp.oacc-c-c++-common/crash-1.c: New.
diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c 
libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c
index a75a817..dcf1485 100644
--- libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c
@@ -1,7 +1,6 @@
-/* { dg-do compile } */
-/* { dg-options "-O0" } */
+/* { dg-do link } */
 
-/* ICEd in nvptx backend due to unexpected frame size.  */
+/* For -O0, ICEd in nvptx backend due to unexpected frame size.  */
 #pragma acc routine worker
 void
 worker_matmul (int *c, int i)


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [PATCH 1/3] (v2) On-demand locations within string-literals

2016-08-04 Thread David Malcolm
On Wed, 2016-08-03 at 09:59 -0600, Jeff Law wrote:
> On 07/29/2016 03:42 PM, Joseph Myers wrote:
> > On Tue, 26 Jul 2016, David Malcolm wrote:
> > 
> > > This patch implements precise tracking of source locations for
> > > the
> > > individual chars within string literals, so that we can e.g.
> > > underline
> > > specific ranges in -Wformat diagnostics.  It handles macros,
> > > concatenated tokens, escaped characters etc.
> > 
> > What if the string literal results from stringizing other tokens
> > (which
> > might have arisen in turn from macro expansion, including expansion
> > of
> > built-in macros not just those defined in source files, etc.)? 
> >  "You don't
> > get precise locations" would be a fine answer for such cases -
> > provided
> > there is good testsuite coverage of them to show they don't crash
> > the
> > compiler or underline nonsensical characters.
> I think losing precise locations in some circumstances would be fine
> as 
> well -- as long as we understand the limitations.

In v3 of the patch, this fails gracefully.

> And, yes, crashing or underlining nonsensical characters would be
> bad, 

The API in input.c is get_source_range_for_substring, which returns an
error message (intended for us, rather than end-users); it is wrapped
by this method in c-common.c:

/* Attempt to determine the source range of the substring.
   If successful, return NULL and write the source range to *OUT_RANGE.
   Otherwise return an error message.  Error messages are intended
   for GCC developers (to help debugging) rather than for end-users.  */

const char *
substring_loc::get_range (source_range *out_range) const

> so it'd be obviously good to test some of that to ensure the
> fallbacks 
> work as expected.

As for test coverage, v2 and v3 of the kit add over a thousand lines of
selftest code that heavily exercise string lexing, using the
 line_table_case machinery to run the tests with various interesting
boundary conditions with line_table (e.g. near
 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES).

In terms of test coverage of the fallbacks, patch 2 of v3 of the kit
directly exercises the substr_loc.get_range in 
gcc.dg/plugin/diagnostic_plugin_test_string_literals.c via
gcc.dg/plugin/diagnostic-test-string-literals-1.c, and some of the
tests there cover the failures, via:

  error_at (strloc, "unable to read substring range: %s", err);

which we wouldn't do in a normal diagnostic (but which is appropriate
for testing the machinery itself).

Patch 3 of the v3 kit adds a format_warning_va function to c-format.c
which is responsible for dealing with failures:
https://gcc.gnu.org/ml/gcc-patches/2016-08/msg00204.html

Sadly the comment got a bit mangled by git in that patch due to the
proximity to the deleted function location_column_from_byte_offset;
here's an inline copy (after patch 4, which adds param
CORRECTED_SUBSTRING for doing fix-it hints for bad format strings):

/* Emit a warning governed by option OPT, using GMSGID as the format
   string and AP as its arguments.

   Attempt to obtain precise location information within a string
   literal from FMT_LOC.

   Case 1: if substring location is available, and is within the range of
   the format string itself, the primary location of the
   diagnostic is the substring range obtained from FMT_LOC, with the
   caret at the *end* of the substring range.

   For example:

 test.c:90:10: warning: problem with '%i' here [-Wformat=]
 printf ("hello %i", msg);
~^

   Case 2: if the substring location is available, but is not within
   the range of the format string, the primary location is that of the
   format string, and an note is emitted showing the substring location.

   For example:
 test.c:90:10: warning: problem with '%i' here [-Wformat=]
 printf("hello " INT_FMT " world", msg);
^
 test.c:19: note: format string is defined here
 #define INT_FMT "%i"
  ~^

   Case 3: if precise substring information is unavailable, the primary
   location is that of the whole string passed to FMT_LOC's constructor.
   For example:

 test.c:90:10: warning: problem with '%i' here [-Wformat=]
 printf(fmt, msg);
^~~

   For each of cases 1-3, if param_range is non-NULL, then it is used
   as a secondary range within the warning.  For example, here it
   is used with case 1:

 test.c:90:16: warning: '%s' here but arg 2 has 'long' type [-Wformat=]
 printf ("foo %s bar", long_i + long_j);
  ~^   ~~~

   and here with case 2:

 test.c:90:16: warning: '%s' here but arg 2 has 'long' type [-Wformat=]
 printf ("foo " STR_FMT " bar", long_i + long_j);
 ^  ~~~
 test.c:89:16: note: format string is defined here
 #define STR_FMT "%s"
  ~^

   and with case 3:

 test.c:90:10: warning: '%i' here, but arg 2 is "const char *' [-Wformat=]
 

[PATCH][PR64971]Convert function pointer to Pmode when emit call

2016-08-04 Thread Renlin Li

Hi all,

In the case of PR64971 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64971),
the compiler ICE when compiling gcc.c-torture/compile/pr37433.c with 
ilp32 abi.


As we know, in aarch64 ilp32, the ptr_mode is SImode while Pmode is 
still DImode. It means all address should be DImode, and the backend 
defines the patterns with this assumption.


The generic part expand_expr_addr_expr () function however generates a
SYMBOL_REF with SImode, it's later used as the address of a MEM rtx 
pattern in a call expression. There is no matching pattern for this 
SImode address, that's why gcc ICEs.

(symbol_ref/f:SI ("*.LC0") [flags 0x82] )

But here, I think what expand_expr_addr_expr does is correct. In this
particular case, expand_expr_addr_expr is not generating an address. 
According to the source code, it's generating a function pointer, and 
later this pointer is used in a call expression. So SImode should be 
right in this case.


The behavior of the test case is, get the address of a piece of memory, 
cast it into a function pointer, and call the function. IIUC, the flow 
is like this:

CALL_EXPR ( NOP_EXPR (ADDR_EXPR ()))

NOP_EXPR here is to convert the address into a function pointer which
should be ptr_mode (SImode). So it's the responsibility of call expander
to convert the pointer into Pmode to create legal call rtx patern.

In the test case, there are two functions. The first function generates 
function calls with a SYMBOL_REF as address, the second one generates a 
REG as address. They are all of ptr_mode.
However, prepare_call_address () will convert the REG into Pmode to make 
it as a legal address while SYMBOL_REF is missed. That's why I add the 
code there.


And I want to change the PR64971 into a middle-end issue. The ICE 
manifests in aarch64 target, but I believe this should be a generic 
problem for targets which define ptr_mode different from Pmode.


There is a test case already, so I didn't add one.
aarch64-none-elf regression test Okay, aarch64-linux bootstrap Okay.
But I believe this may not help as the default abi is LP64.

It will be great if Andrew you can help to do regression test in your
aarch64 ilp32 environment.

And I double checked that, the backend fix can be removed without any
problem. It's good to expose middle-end bugs.

Okay for trunk and backport to branch 6?

gcc/ChangeLog:

2016-08-04  Renlin Li  

PR middle-end/64971
* calls.c (prepare_call_address): Convert funexp to Pmode when
necessary.
* config/aarch64/aarch64.md (sibcall): Remove fix for PR 64971.
(sibcall_value): Likewise.
diff --git a/gcc/calls.c b/gcc/calls.c
index c04d00f..b00c153 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -194,10 +194,19 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value,
 	   && targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
 	  ? force_not_mem (memory_address (FUNCTION_MODE, funexp))
 	  : memory_address (FUNCTION_MODE, funexp));
-  else if (! sibcallp)
+  else
 {
-  if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
-	funexp = force_reg (Pmode, funexp);
+  /* funexp could be a SYMBOL_REF represents a function pointer which is
+	 of ptr_mode.  In this case, it should be converted into address mode
+	 to be a valid address for memory rtx pattern.  See PR 64971.  */
+  if (GET_MODE (funexp) != Pmode)
+	funexp = convert_memory_address (Pmode, funexp);
+
+  if (! sibcallp)
+	{
+	  if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
+	funexp = force_reg (Pmode, funexp);
+	}
 }
 
   if (static_chain_value != 0
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index f15dd8d..c95258b 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -859,13 +859,6 @@
 	   || aarch64_is_noplt_call_p (callee)))
   XEXP (operands[0], 0) = force_reg (Pmode, callee);
 
-/* FIXME: This is a band-aid.  Need to analyze why expand_expr_addr_expr
-   is generating an SImode symbol reference.  See PR 64971.  */
-if (TARGET_ILP32
-	&& GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
-	&& GET_MODE (XEXP (operands[0], 0)) == SImode)
-  XEXP (operands[0], 0) = convert_memory_address (Pmode,
-		  XEXP (operands[0], 0));
 if (operands[2] == NULL_RTX)
   operands[2] = const0_rtx;
 
@@ -897,14 +890,6 @@
 	   || aarch64_is_noplt_call_p (callee)))
   XEXP (operands[1], 0) = force_reg (Pmode, callee);
 
-/* FIXME: This is a band-aid.  Need to analyze why expand_expr_addr_expr
-   is generating an SImode symbol reference.  See PR 64971.  */
-if (TARGET_ILP32
-	&& GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
-	&& GET_MODE (XEXP (operands[1], 0)) == SImode)
-  XEXP (operands[1], 0) = convert_memory_address (Pmode,
-		  XEXP (operands[1], 0));
-
 if (operands[3] == NULL_RTX)
   operands[3] = const0_rtx;
 


Re: [PATCH 1/4] Cherry-pick fprofile-generate-atomic from google/gcc-4_9 branch

2016-08-04 Thread Nathan Sidwell

On 08/01/16 09:29, Martin Liška wrote:


I also added a small hunk that describes problematic of app having not-joined 
(or detached) threads,
can you please take a look at documentation change, maybe it would need some 
transformation?


sorry for the tady response,thanks for the ping.

In general good.   Some nits:


+++ b/gcc/tree-profile.c
@@ -164,7 +164,12 @@ gimple_init_edge_profiler (void)
  = build_function_type_list (void_type_node,
  gcov_type_ptr, gcov_type_node,
  NULL_TREE);
-  tree_one_value_profiler_fn
+  if (flag_profile_update == PROFILE_UPDATE_ATOMIC)
+   tree_one_value_profiler_fn
+ = build_fn_decl ("__gcov_one_value_profiler_atomic",
+one_value_profiler_fn_type);
+  else
+   tree_one_value_profiler_fn
  = build_fn_decl ("__gcov_one_value_profiler",
 one_value_profiler_fn_type);

this hunk uses a different idiom to ...

@@ -180,11 +185,14 @@ gimple_init_edge_profiler (void)
  gcov_type_node,
  ptr_void,
  NULL_TREE);
+  const char *profiler_fn_name = "__gcov_indirect_call_profiler_v2";
+  if (PARAM_VALUE (PARAM_INDIR_CALL_TOPN_PROFILE))
+   profiler_fn_name = "__gcov_indirect_call_topn_profiler";
+  if (flag_profile_update == PROFILE_UPDATE_ATOMIC)
+   profiler_fn_name = "__gcov_indirect_call_profiler_v2_atomic";

I prefer the latter's approach.

@@ -241,22 +249,37 @@ gimple_init_edge_profiler (void)
 void
 gimple_gen_edge_profiler (int edgeno, edge e)
...
+  else
+{
/* COMMENT thread unsafe sequence */
+  tree ref = tree_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);


diff --git a/libgcc/libgcov-profiler.c b/libgcc/libgcov-profiler.c
+static inline void
+__gcov_one_value_profiler_body_atomic (gcov_type *counters, gcov_type value)
+{
...

The body looks to have data races.  Some kind of cmp_store needed on 
counters[1]?  Maybe it can't be completely race free?


nathan


[GCC Steering Committee attention] [PING] [PING] [PING] libgomp: In OpenACC testing, cycle though $offload_targets, and by default only build for the offload target that we're actually going to test

2016-08-04 Thread Thomas Schwinge
Hi!

Ping.

It has now been more than three months (!) that I first submitted this,
without receiving any meaningful review.  (Apart from one initial
"deprecative" comment by Jakub, which I then repeatedly detailed on,
without receiving any further response.)
.

I suppose, if I weren't paid for paid for this, I would have run away
long ago, and would have looked for another project to contribute to.
:-(

I'm CCing the GCC Steering Committee here -- not necessarily only because
of this one patch, but generally, I think it's a bad situation that
apparently Jakub (who I acknowledge is always very busy with all kinds of
tasks) has de facto become the single reviewer of
OpenACC/OpenMP/offloading patches.  I'm certainly not going in any way to
disapprove Jakub's help, skills and experience, but I'm more and more
worried about this "bus factor" of one single person
().

As I'm unable (huh?) to find an email address to reach the GCC Steering
Committee, I'm CCing  as suggested on
, and David Edelsohn, who seems to
handle most of the visible communication of the GCC Steering Committee.

When raising a similar concern months ago, an answer as given (by Jakub
himself, if I remember correctly), was that in addition to him, all
Global Reviewers are welcome to review OpenACC/OpenMP/offloading patches.
But that doesn't help if that's then not happening in reality.  (With the
exception of Bernd, who then did review such patches for a while, but
also seems to have stopped with that again.)

To the best of my knowledge, I'm following the procedures correctly
(please tell me if I'm not!), including pinging patches regularly (once a
week, typically).

Help.  I'm just trying to contribute my share for keeping GCC alive and
relevant.  :-)

On Wed, 27 Jul 2016 18:03:32 +0200, I wrote:
> Ping.
> 
> On Wed, 20 Jul 2016 13:52:20 +0200, I wrote:
> > Ping.
> > 
> > On Wed, 13 Jul 2016 12:37:07 +0200, I wrote:
> > > As discussed before, "offloading compilation is slow; I suppose because
> > > of having to invoke several tools (LTO streaming -> mkoffload -> offload
> > > compilers, assemblers, linkers -> combine the resulting images; but I
> > > have not done a detailed analysis on that)".  For this reason it is
> > > beneficial (that is, it is measurable in libgomp testing wall time) to
> > > limit offload compilation to the one (in the OpenACC case) offload target
> > > that we're actually going to test (that is, execute).  Another reason is
> > > that -foffload=-fdump-tree-[...] produces clashes (that is,
> > > unpredicatable outcome) in the file names of offload compilations' dump
> > > files' names.  Here is a patch to implement that, to specify
> > > -foffload=[...] during libgomp OpenACC testing.  As that has been
> > > challenged before:
> > > 
> > > | [...] there actually is a difference between offload_plugins and
> > > | offload_targets (for example, "intelmic"
> > > | vs. "x86_64-intelmicemul-linux-gnu"), and I'm using both variables --
> > > | to avoid having to translate the more specific
> > > | "x86_64-intelmicemul-linux-gnu" (which we required in the test harness)
> > > | into the less specific "intelmic" (for plugin loading) in
> > > | libgomp/target.c.  I can do that, so that we can continue to use just a
> > > | single offload_targets variable, but I consider that a less elegant
> > > | solution.
> > > 
> > > OK for trunk?
> > > 
> > > commit 5fdb515826769ebb36bc5c49a3ffac4d17a8a589
> > > Author: Thomas Schwinge 
> > > Date:   Wed Jul 13 11:37:16 2016 +0200
> > > 
> > > libgomp: In OpenACC testing, cycle though $offload_targets, and by 
> > > default only build for the offload target that we're actually going to 
> > > test
> > > 
> > >   libgomp/
> > >   * plugin/configfrag.ac: Enumerate both offload plugins and 
> > > offload
> > >   targets.
> > >   (OFFLOAD_PLUGINS): Renamed from OFFLOAD_TARGETS.
> > >   * target.c (gomp_target_init): Adjust to that.
> > >   * testsuite/lib/libgomp.exp: Likewise.
> > >   (offload_targets_s, offload_targets_s_openacc): Remove 
> > > variables.
> > >   (offload_target_to_openacc_device_type): New proc.
> > >   (check_effective_target_openacc_nvidia_accel_selected)
> > >   (check_effective_target_openacc_host_selected): Examine
> > >   $openacc_device_type instead of $offload_target_openacc.
> > >   * Makefile.in: Regenerate.
> > >   * config.h.in: Likewise.
> > >   * configure: Likewise.
> > >   * testsuite/Makefile.in: Likewise.
> > >   * testsuite/libgomp.oacc-c++/c++.exp: Cycle through
> > >   $offload_targets (plus "disable") instead of
> > >   $offload_targets_s_openacc, and add "-foffload=$offload_target" 
> > > to
> > >   tagopt.
> > >   * testsuite/libgomp.oacc-c/c.exp: Likewi

Re: libgo patch committed: Update to 1.7rc3

2016-08-04 Thread Ian Lance Taylor
On Thu, Aug 4, 2016 at 5:12 AM, Rainer Orth  
wrote:
> Hi Uros,
>
>>> I have committed a patch to update libgo to the 1.7rc3 release
>>> candidate.  This is very close to the upcoming 1.7 release.  As usual
>>> with libgo updates, the patch is too large to include in this e-mail
>>> message.  I've appended the changes to the gccgo-specific directories.
>>
>> There is an issue with
>>
>> libgo/go/crypto/sha1/issue15617_test.go.
>>
>> The test crypto/sha1 fails on alpha-linux-gnu with:
>>
>> --- FAIL: TestOutOfBoundsRead (0.00s)
>> panic: invalid argument [recovered]
>> panic: invalid argument
>> ...
>>
>> since the test hard-codes 4k pages, but alpha uses 8k pages.
>>
>> It looks that the second line of build directives in the test:
>>
>> // +build amd64
>> // +build linux darwin
>>
>> overwrites the first one, so the test runs also on non-amd64
>> architecture linux OS. I have confirmed this by removing the second
>> build directive, and crypto/sha1 test then passed, since
>> issue15617_test.go was not linked into the final executable.
>>
>> Another possible solution is to avoid hard-coding 4k pages in the
>> test. The mentioned test will pass on alpha when
>>
>> const pageSize = 4 << 10
>>
>> is changed to
>>
>> const pageSize = 8 << 10
>
> just FTR, I've been seeing the same failure on Solaris/SPARC, which
> uses 8k pages.

That test was only supposed to be run on x86_64.  It was a bug that it
was run on any other target.  I committed a patch yesterday to fix
that, so you should no longer see this test failure.

Ian


Re: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection, (PR gcov-profile/67992)

2016-08-04 Thread Martin Liška
On 08/04/2016 03:15 PM, Nathan Sidwell wrote:
> On 08/04/16 06:41, Martin Liška wrote:
>> On 08/03/2016 04:22 PM, Nathan Sidwell wrote:
>>> Martin,
 As I've going through all PRs related to gcov-profile, I've noticed this 
 PR.
 Current implementation of cycle detection in gcov is very poor, leading to 
 extreme run time
 for cases like mentioned in the PR (which does not contain a cycle). Thank 
 to Joshua, I've
 grabbed his patch and removed the scaffolding (classes: Arc, Block, ...) 
 he did. After doing that
 the patch is quite subtle and fast (of course).
>>>
>>> sorry to be a pain, but could you split the patch into
>>> a) formatting changes
>>> b) the clever  bits
>>>
>>> the formatting changes can then (probably) be applied as obvious.
>>>
>>> nathan
>>
>> This is second part which is the change of loop detection algorithm.
> 
> typedefs for arc and block pointer vectors would be useful to add.  They're 
> used in a lot of  places:
> 
> typedef vector arc_vector_t;
> typedef vector block_vector_t;
> 
> (question, should those be  'const T *' template parms?)

const block_t works for me, arc_t doesn't:
../../gcc/gcov.c:470:27: error: assignment of member ‘arc_info::cs_count’ in 
read-only object
 edges[i]->cs_count -= cycle_count;


> 
> No need for vector of block vectors typedef, unless you think otherwise.
> 
> +/* Flag that drives cycle detection after a negative cycle is seen.  */
> +static bool did_negate = false;
> 
> That's ugly, and I think unnecessary.  Use +1 for loop, -1 for negated loop, 
> 0 for no loop  (or a tri-valued enum with the right properties)
> 
> 1) have handle_cycle return +1 (not negated) or -1 (negated) appropriately.
> 
> 2) have circuit return an int similarly. Then
>   if (w == start)
> found |= handle_cycle (path, count);
>   else if (...)
> found |= circuit (...)
> will DTRT there
> 
> 3) finally have find_cycles merge the results from its circuit calls and 
> determine whether to repeat itself -- rather than have the caller do it. (or 
> have another reference parm to tell the caller?)

I decided to use a new enum, hope it's better?

Martin

> 
> nathan
> 

>From 24cd47f44e9958bd7fd0c40af849cedc567d6341 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 4 Aug 2016 12:34:08 +0200
Subject: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection,
 (PR gcov-profile/67992)

gcc/ChangeLog:

2016-08-04  Martin Liska  
	Joshua Cranmer  

	* gcov.c (line_t::has_block): New function.
	(enum loop_type): New enum.
	(handle_cycle): New function.
	(unblock): Likewise.
	(circuit): Likewise.
	(get_cycles_count): Likewise.
	(accumulate_line_counts): Use new loop detection algorithm.
---
 gcc/gcov.c | 290 -
 1 file changed, 189 insertions(+), 101 deletions(-)

diff --git a/gcc/gcov.c b/gcc/gcov.c
index 40701a1..9c9eccf 100644
--- a/gcc/gcov.c
+++ b/gcc/gcov.c
@@ -41,6 +41,11 @@ along with Gcov; see the file COPYING3.  If not see
 
 #include 
 
+#include 
+#include 
+
+using namespace std;
+
 #define IN_GCOV 1
 #include "gcov-io.h"
 #include "gcov-io.c"
@@ -222,6 +227,9 @@ typedef struct coverage_info
 
 typedef struct line_info
 {
+  /* Return true when NEEDLE is one of basic blocks the line belongs to.  */
+  bool has_block (block_t *needle);
+
   gcov_type count;	   /* execution count */
   union
   {
@@ -235,6 +243,16 @@ typedef struct line_info
   unsigned unexceptional : 1;
 } line_t;
 
+bool
+line_t::has_block (block_t *needle)
+{
+  for (block_t *n = u.blocks; n; n = n->chain)
+if (n == needle)
+  return true;
+
+  return false;
+}
+
 /* Describes a file mentioned in the block graph.  Contains an array
of line info.  */
 
@@ -407,6 +425,167 @@ static void release_structures (void);
 static void release_function (function_t *);
 extern int main (int, char **);
 
+/* Cycle detection!
+   There are a bajillion algorithms that do this.  Boost's function is named
+   hawick_cycles, so I used the algorithm by K. A. Hawick and H. A. James in
+   "Enumerating Circuits and Loops in Graphs with Self-Arcs and Multiple-Arcs"
+   (url at ).
+
+   The basic algorithm is simple: effectively, we're finding all simple paths
+   in a subgraph (that shrinks every iteration).  Duplicates are filtered by
+   "blocking" a path when a node is added to the path (this also prevents non-
+   simple paths)--the node is unblocked only when it participates in a cycle.
+   */
+
+typedef vector arc_vector_t;
+typedef vector block_vector_t;
+
+/* Enum with types of loop in CFG.  */
+
+enum loop_type
+{
+  NO_LOOP,
+  LOOP,
+  NEGATIVE_LOOP
+};
+
+/* Handle cycle identified by EDGES, where the function finds minimum cs_count
+   and subtract the value from all counts.  The subtracted value is added
+   to COUNT.  Returns type of loop.  */
+
+static loop_type
+handle_cycle (const arc_vector_t &edges, int64_t &count)
+{
+  /*

[PING] Use correct location information for OpenACC shape and simple clauses in C/C++

2016-08-04 Thread Thomas Schwinge
Hi!

On Wed, 27 Jul 2016 17:09:38 -0400, David Malcolm  wrote:
> On Wed, 2016-07-27 at 17:17 +0200, Thomas Schwinge wrote:
> > I found that for a lot of OpenACC (and potentially also OpenMP)
> > clauses
> > (in C/C++ front ends; didn't look at Fortran), we use wrong location
> > information.  The problem is that
> > c_parser_oacc_all_clauses/c_parser_omp_all_clauses calls
> > cp_parser_omp_clause_name to determine the pragma_omp_clause c_kind,
> > and
> > that function (as documented) consumes the clause token before
> > returning.
> > So, when we then do "c_parser_peek_token (parser)->location" or
> > similar
> > in some clause parsing function, that will return the location
> > information of the token _after_ the clause token, which -- at least
> > very
> > often -- is not desirable, in particular if that location information
> > is
> > used then in a build_omp_clause call, which should point to the
> > clause
> > token itself, and not whatever follows after that.
> > 
> > Probably that all went unnoticed for so long, because the GCC
> > testsuite
> > largely is running with -fno-diagnostics-show-caret, so we don't
> > visually
> > see the wrong location information (and nobody pays attention to the
> > colum information as given, for example, as line 2, column 32 in
> > "[...]/c-c++-common/goacc/routine-2.c:2:32: error: [...]".
> 
> > There seems to be a lot of inconsistency in that in all the clause
> > parsing; here is just a first patch to fix the immediate problem I've
> > been observing.  OK for trunk already, or need to clean this all up
> > in
> > one go?  Do we need this on release branches, as a "quality of
> > implementation" fix (wrong diagnostic locations)?

> > [initial patch]

Ping for that one.


> I'm not a reviewer for the C/C++ FEs so I can't really review this
> patch

I think in your position as a maintainer for "diagnostic messages", you
should be qualified to exercise that status to approve such a patch.  :-)


> but it might be nice in this (or in a followup) to add some test
> cases for this that explicitly test the caret information for some of
> these errors.  [...]

> Hope this is constructive

It certainly is, thanks!  In fact, I had planned to look up how to do
that, which you've now made simpler by providing a specific receipe.


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [PATCH/AARCH64] Add ThunderX vector cost model

2016-08-04 Thread Richard Earnshaw (lists)
On 03/08/16 23:42, Andrew Pinski wrote:
> Hi,
>   This patch adds to the thunderx model, the vector cost model.  I
> benchmarked this on SPEC CPU INT 2006 and got a small speed up.  I
> have a few more cost model patches that I am going upstream but they
> are going to be split up.
> 
> OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.
> 

OK.

R.

> Thanks,
> Andrew Pinski
> 
> ChangeLog:
> * config/aarch64/aarch64.c (thunderx_vector_cost): New variable.
> (thunderx_tunings): Use thunderx_vector_cost instead of generic_vector_cost.
> 
> 
> addthunderxcost.diff.txt
> 
> 
> Index: config/aarch64/aarch64.c
> ===
> --- config/aarch64/aarch64.c  (revision 239098)
> +++ config/aarch64/aarch64.c  (working copy)
> @@ -376,6 +376,24 @@ static const struct cpu_vector_cost gene
>1 /* cond_not_taken_branch_cost  */
>  };
>  
> +/* ThunderX costs for vector insn classes.  */
> +static const struct cpu_vector_cost thunderx_vector_cost =
> +{
> +  1, /* scalar_stmt_cost  */
> +  3, /* scalar_load_cost  */
> +  1, /* scalar_store_cost  */
> +  4, /* vec_stmt_cost  */
> +  4, /* vec_permute_cost  */
> +  2, /* vec_to_scalar_cost  */
> +  2, /* scalar_to_vec_cost  */
> +  3, /* vec_align_load_cost  */
> +  10, /* vec_unalign_load_cost  */
> +  10, /* vec_unalign_store_cost  */
> +  1, /* vec_store_cost  */
> +  3, /* cond_taken_branch_cost  */
> +  3 /* cond_not_taken_branch_cost  */
> +};
> +
>  /* Generic costs for vector insn classes.  */
>  static const struct cpu_vector_cost cortexa57_vector_cost =
>  {
> @@ -677,7 +695,7 @@ static const struct tune_params thunderx
>&thunderx_extra_costs,
>&generic_addrcost_table,
>&thunderx_regmove_cost,
> -  &generic_vector_cost,
> +  &thunderx_vector_cost,
>&generic_branch_cost,
>&generic_approx_modes,
>6, /* memmov_cost  */
> 



Re: [PATCH], Improve vector int/long initialization on PowerPC

2016-08-04 Thread Segher Boessenkool
Hi Mike,

On Thu, Aug 04, 2016 at 12:33:44AM -0400, Michael Meissner wrote:
> I built spec 2006 with these patches on a little endian power8 system, and at
> least 18 of the benchmarks had vector initializations replaced.  Most
> benchmarks only used the initialization in a few places, but gamess, dealII,
> h264ref, and wrf each had over 100 initializations changed.

Did performance change?

> I have tried these patches on a big endian power7 system (both 32-bit and
> 64-bit targets), on a big endian power8 system (just 64-bit targets), and a
> little endian power8 system (just 64-bit targets).  There were no regressions
> on any of the systems.  Can I install these patches to the trunk?

Some questions below, okay for trunk with those taken care of.  Thanks.


> --- gcc/config/rs6000/rs6000.c
> (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000)
> (revision 239098)
> +++ gcc/config/rs6000/rs6000.c(.../gcc/config/rs6000) (working copy)
> @@ -6736,6 +6736,38 @@ rs6000_expand_vector_init (rtx target, r
>return;
>  }
>  
> +  /* Special case initializing vector int if we are on 64-bit systems with
> + direct move.  This bug tickles a bug in reload for fortran's
> + cray_pointers_2 test unless -mvsx-timode is enabled.  */

"This bug"?  It's not clear to me what this says, could you rephrase?
Just say what the code does, not what would happen without the code.  Or
say both.

> +static inline int
> +regno_or_subregno (rtx op)
> +{
> +  if (REG_P (op))
> +return REGNO (op);
> +  else if (SUBREG_P (op))
> +return subreg_regno (op);
> +  else
> +gcc_unreachable ();
> +}

Maybe this should check the subreg is lowpart, too?  For robustness.

>  ;; Build a V2DF/V2DI vector from two scalars
>  (define_insn "vsx_concat_"
> -  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=,?")
> +  [(set (match_operand:VSX_D 0 "gpc_reg_operand" "=,we")
>   (vec_concat:VSX_D
> -  (match_operand: 1 "vsx_register_operand" ",")
> -  (match_operand: 2 "vsx_register_operand" 
> ",")))]
> +  (match_operand: 1 "gpc_reg_operand" ",r")
> +  (match_operand: 2 "gpc_reg_operand" ",r")))
> +   (clobber (match_scratch:DI 3 "=X,X"))]

X,X?  How is that useful?

> +   (set_attr "length" "4")])

One insn is the default.


Segher


Re: [RS6000] rs6000_preferred_reload_class

2016-08-04 Thread Segher Boessenkool
On Thu, Aug 04, 2016 at 02:44:34PM +0930, Alan Modra wrote:
> On Wed, Aug 03, 2016 at 03:30:53PM -0400, Michael Meissner wrote:
> > On Mon, Aug 01, 2016 at 09:00:43AM +0930, Alan Modra wrote:
> > > Hi Mike,
> > > I've been looking at a lot of reload/lra code lately in trying to fix
> > > pr71680, and noticed a change to rs6000_preferred_reload_class.
> > > 
> > > In https://gcc.gnu.org/ml/gcc-patches/2016-05/msg00134.html you made
> > > preferred_reload_class pick a register class when given NO_REGS as an
> > > argument.  That seems odd to me given the usual behaviour of
> > > preferred_reload_class is to restrict reg classes.  Did you mean to
> > > make that change?  I'm wondering whether something you were playing
> > > with escaped upstream, because there is no ChangeLog for it as far as
> > > I can see..
> > 
> > I was playing with that, but given the patch was meant to be complex only, I
> > suspect we should try taking it out now, and see if it affects anything.
> 
> Now bootstrapped and regression tested powerpc64le-linux and
> powerpc64-linux.  OK to apply?

Okay, thanks,


Segher


>   * config/rs6000/rs6000.c (rs6000_preferred_reload_class): Delete
>   code accidentally committed 2016-05-02 providing class when given
>   NO_REGS.


Re: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection, (PR gcov-profile/67992)

2016-08-04 Thread Nathan Sidwell

On 08/04/16 10:42, Martin Liška wrote:


I decided to use a new enum, hope it's better?


that's fine.  But you know, if you set the enum values appropriately you could 
use the | trick rather than the compare you've done (c++ enum type safety would 
require an overloaded | operator though).  I don't mind either way,



+get_cycles_count (line_t &linfo, bool handle_negative_cycles = true)
...
+  for (block_t *block = linfo.u.blocks; block; block = block->chain)
+{
+  arc_vector_t path;
+  block_vector_t blocked;
+  vector block_lists;
+  result = circuit (block, path, block, blocked, block_lists, linfo, 
count);
+}
+
+  /* If we have a negative cycle, repeat the find_cycles routine.  */
+  if (result == NEGATIVE_LOOP && handle_negative_cycles)
+count += get_cycles_count (linfo, false);

The retry will depend on the result of the final call of circuit.  Before  it 
happened if any loop was negated.  Is this change intentional?


nathan


Re: protected alloca class for malloc fallback

2016-08-04 Thread Aldy Hernandez

On 08/04/2016 08:57 AM, Richard Biener wrote:

On Thu, Aug 4, 2016 at 1:30 PM, Aldy Hernandez  wrote:

Howdy!

As part of my -Walloca-larger-than=life work, I've been running said pass
over gcc, binutils, and gdb, and trying to fix things along the way.

Particularly irritating and error prone is having to free malloc'd pointers
on every function exit point.  We end up with a lot of:

foo(size_t len)
{
   void *p, *m_p = NULL;
   if (len < HUGE)
 p = alloca(len);
   else
 p = m_p = malloc(len);
   if (something)
 goto out;
   stuff();
out:
   free (m_p);
}

...which nobody really likes.

I've been thinking that for GCC we could have a protected_alloca class whose
destructor frees any malloc'd memory:

void foo()
{
   char *p;
   protected_alloca chunk(5);
   p = (char *) chunk.pointer();
   f(p);
}

This would generate:

void foo() ()
{
   void * _3;

   :
   _3 = malloc (5);
   f (_3);

   :
   free (_3); [tail call]
   return;
}

Now the problem with this is that the memory allocated by chunk is freed
when it goes out of scope, which may not be what you want.  For example:

  func()
  {
char *str;
{
  protected_alloca chunk ();
  // malloc'd pointer will be freed when chunk goes out of scope.
  str = (char *) chunk.pointer ();
}
use (str);  // BAD!  Use after free.
  }


But how's that an issue if the chunk is created at the exact place where there
previously was an alloca?


The pointer can escape if you assign it to a variable outside the scope 
of chunk?  Take for instance the following snippet in tree.c:


{
...
...
  q = (char *) alloca (9 + 17 + len + 1);
  memcpy (q, file, len + 1);

  snprintf (q + len, 9 + 17 + 1, "_%08X_" HOST_WIDE_INT_PRINT_HEX,
crc32_string (0, name), get_random_seed (false));

  p = q;
}

clean_symbol_name (q);

If you define `protected_alloca chunk(9 + 17 + len + 1)' at the alloca() 
call, chunk will be destroyed at the "}", whereas `q' is still being 
used outside of that scope.


What I am suggesting for this escaping case is to define 
"protected_alloca chunk()" at function scope, and then do chunk.alloc(N) 
in the spot where the alloca() call was previously at.


Or am I missing something?



Your class also will not work when internal_alloc is not inlined and
the alloca path
is taken like when using non-GCC host compilers.


Does not work, or is not optimal?  Because defining _ALLOCA_INLINE_ to 
nothing and forcing no-inline with:


g++ -c b.cc -fno-inline -fdump-tree-all  -O1 -fno-exceptions

I still see correct code.  It's just that it's inefficient, which we 
shouldn't care because bootstrap fixes the non-GCC inlining problem :).


  protected_alloca chunk(123);
  str = (char *) chunk.pointer();
  use(str);

becomes:

  :
  protected_alloca::protected_alloca (&chunk, 123);
  str_3 = protected_alloca::pointer (&chunk);
  use (str_3);
  protected_alloca::~protected_alloca (&chunk);
  return;

What am I missing?




In the attached patch implementing this class I have provided another idiom
for avoiding this problem:

  func()
  {
void *ptr;
protected_alloca chunk;
{
  chunk.alloc (999);
  str = (char *) chunk.pointer ();
}
// OK, pointer will be freed on function exit.
use (str);
  }

So I guess it's between annoying gotos and keeping track of multiple exit
points to a function previously calling alloca, or making sure the
protected_alloca object always resides in the scope where the memory is
going to be used.

Is there a better blessed C++ way?  If not, is this OK?


It looks like you want to replace _all_ alloca uses?  What's the point
in doing this
at all?  Just to be able to enable the warning during bootstrap?


Well, it did cross my mind to nix anything that had 0 bounds checks, but 
I was mostly interested in things like this:


gcc.c:
  temp = env.get ("COMPILER_PATH");
  if (temp)
{
  const char *startp, *endp;
  char *nstore = (char *) alloca (strlen (temp) + 3);

I was just providing a generic interface for dealing with these cases in 
the future, instead of gotoing my way out of it.




Having the conditional malloc/alloca will also inhibit optimization like eliding
the malloc or alloca calls completely.


If we can elide the alloca, we can surely elide a conditional alloca / 
malloc pair, can't we?


Aldy


[PATCH v2,rs6000] Add built-in function support for Power9 binary floating point operations

2016-08-04 Thread Kelvin Nilsen
This patch adds built-in support for the following fourteen new binary
floating point instructions introduced with the Power9 architecture: 

VSX Scalar Extract Exponent Double-Precision
VSX Scalar Extract Significand Double-Precision
VSX Scalar Insert Exponent Double-Precision
VSX Scalar Compare Exponents Double-Precision
VSX Scalar Test Data Class Double-Precision
VSX Scalar Test Data Class Single-Precision
VSX Vector Extract Exponent Double-Precision
VSX Vector Extract Exponent Single-Precision
VSX Vector Extract Significand Double-Precision
VSX Vector Extract Significand Single-Precision
VSX Vector Insert Exponent Double-Precision
VSX Vector Insert Exponent Single-Precision
VSX Vector Test Data Class Double-Precision
VSX Vector Test Data Class Single-Precision

This patch differs from the first version of this patch in the
following ways:

1. Added semicolons where they were missing in extend.texi 
   descriptions of new built-in functions.
2. Reformatted the word wrap and indentation on some code
   displays within extend.texi.
3. Removed various trailing spaces in extend.texi and in 
   source files.
4. Removed a dg-skip-if dejagnu directive from 50 test
   files and placed this functionality into the corresponding
   bfp.exp file instead.
5. Improved comments describing multiple dejagnu test programs.
6. In vsx.md, removed newly introduced VSX_SF mode iterator and
   replaced all uses with existing SFDF mode iterator.
7. In vsx.md, removed newly introduced vsx_sf_suffix mode
   attributes and replaced with existing Fvsx mode attributes.
8. In vsx.md, removed VSX_F_INTEGER mode attributes and replaced
   with VSI.
9. In vsx.md, removed vsx_f_suffix mode attributes and replaced with 
   VSs.
10. Fixed indentation and line wrap and use of spaces and tabs for 
several define_insn and define_expand directives.
11. Fixed the attributes associated with new define_insn directives.
12. Added an operand predicate on the new xscmpexpdp insn.
13. In rs6000-c.c, moved the declaration of the unsupported_builtin
variable closer to its usage.
14. In rs6000-builtin.def, replaced spaces with tabs in the argument
lists for multiple BU_P9V_VSX_2 macro expansions.
15. In predicates.md, rewrote the new u7bit_cint_operand predicate
to use IN_RANGE and INTVAL.

Thank you to Segher Boessenkool and Pat Haugen for helpful review
feedback.

The patch has been bootstrapped and tested on powerpc64le-unknown-linux
and on powerpc64-unknown-linux (big-endian) with no regressions.  Is
this ok for the trunk?

gcc/ChangeLog:

2016-08-04  Kelvin Nilsen  

* config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add
overloaded binary floating point functions.
(altivec_resolve_overloaded_builtin): Improve error messages to
distinguish between functions not supported in the current
compiler configuration and functions that were invoked with an
invalid parameter combination, and include the built-in function
name in both error messages.
* config/rs6000/rs6000-builtin.def (BU_P9V_64BIT_VSX_1): New macro
for power9 built-ins.
(BU_P9V_VSX_2): Likewise.
(BU_P9V_64BIT_VSX_2): Likewise.
(VSEEDP): Add scalar extract exponent support.
(VSEESP): Add scalar extract signature support.
(VSTDCNDP): Add scalar test negative support.
(VSTDCNSP): Likewise.
(VSIEDP): Add scalar insert exponent support.
(VSCEDPGT): Add scalar compare exponent greater than support.
(VSCEDPLT): Add scalar compare exponent less than support.
(VSCEDPEQ): Add scalar compare exponent test-for-equality support.
(VSCEDPUO): Add scalar compare exponent test-for-unordered support.
(VSTDCDP): Add scalar test data class support.
(VSTDCSP): Likewise.
(VSEEDP): Add overload support for scalar extract exponent
operation.
(VSESDP): Add overload support for scalar extract signature
operation.
(VSTDCN): Add overload support for scalar test negative
operation.
(VSTDCNDP): Add overload support for scalar test negative
operation.
(VSTDCNSP): Add overload support for scalar test negative
operation.
(VSIEDP): Add overload support for scalar insert exponent
operation.
(VSTDC): Add overload support for scalar test data class
operation.
(VSTDCDP): Add overload support for scalar test data class
operation.
(VSTDCSP): Add overload support for scalar test data class
opreation.
(VSCEDPGT): Add overload support for scalar compare exponent
greater than operation.
(VSCEDPLT): Add overload support for scalar compare exponent
less than operation.
(VSCEDPEQ): Add overload support for scalar compare exponent
test-for-equality operation.
(VSCEDPUO): Add overload support for scalar compare exponent
test-for-unordered o

Re: [PATCH][AArch64] Add legitimize_address_displacement hook

2016-08-04 Thread Richard Earnshaw (lists)
On 04/08/16 12:00, Wilco Dijkstra wrote:
> This patch adds legitimize_address_displacement hook so that stack accesses
> with large offsets are split into a more efficient sequence.  Byte and 
> halfword
> accesses use a 4KB range, wider accesses use a 16KB range to maximise the
> available addressing range and increase opportunities to share the base 
> address.
> 
> int f(int x)
> {
>   int arr[8192];
>   arr[4096] = 0;
>   arr[6000] = 0;
>   arr[7000] = 0;
>   arr[8191] = 0;
>   return arr[x];
> }
> 
> Now generates:
> 
>   sub sp, sp, #32768
>   add x1, sp, 16384
>   str wzr, [x1]
>   str wzr, [x1, 7616]
>   str wzr, [x1, 11616]
>   str wzr, [x1, 16380]
>   ldr w0, [sp, w0, sxtw 2]
>   add sp, sp, 32768
>   ret
> 
> instead of:
> 
>   sub sp, sp, #32768
>   mov x2, 28000
>   add x1, sp, 16384
>   mov x3, 32764
>   str wzr, [x1]
>   mov x1, 24000
>   add x1, sp, x1
>   str wzr, [x1]
>   add x1, sp, x2
>   str wzr, [x1]
>   add x1, sp, x3
>   str wzr, [x1]
>   ldr w0, [sp, w0, sxtw 2]
>   add sp, sp, 32768
>   ret
> 
> Bootstrap, GCC regression OK.

OK.  But please enhance the comment with some explanation as to WHY
you've chosen to use just two base pairings rather than separate bases
for each access size.

R.

> 
> ChangeLog:
> 2016-08-04  Wilco Dijkstra  
> 
> gcc/
>   * config/aarch64/aarch64.c (aarch64_legitimize_address_displacement):
>   New function.
>   (TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT): Define.
> --
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> a0e7680ad0946a27d95a67a9892bb7e264a90451..7bf12475494fb004f5a92445ae31fdc52af43c3b
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4132,6 +4132,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
>return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
>  }
>  
> +/* Split an out-of-range address displacement into a base and offset.
> +   Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise.  */
> +
> +static bool
> +aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode 
> mode)
> +{
> +  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
> +
> +  *off = GEN_INT (INTVAL (*disp) & ~mask);
> +  *disp = GEN_INT (INTVAL (*disp) & mask);
> +  return true;
> +}
> +
>  /* Return TRUE if rtx X is immediate constant 0.0 */
>  bool
>  aarch64_float_const_zero_rtx_p (rtx x)
> @@ -14096,6 +14109,10 @@ aarch64_optab_supported_p (int op, machine_mode 
> mode1, machine_mode,
>  #undef TARGET_LEGITIMATE_CONSTANT_P
>  #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
>  
> +#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
> +#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
> +  aarch64_legitimize_address_displacement
> +
>  #undef TARGET_LIBGCC_CMP_RETURN_MODE
>  #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
> 
> 



Re: [PATCH 1/4] Cherry-pick fprofile-generate-atomic from google/gcc-4_9 branch

2016-08-04 Thread Martin Liška
On 08/04/2016 04:48 PM, Nathan Sidwell wrote:
> diff --git a/libgcc/libgcov-profiler.c b/libgcc/libgcov-profiler.c
> +static inline void
> +__gcov_one_value_profiler_body_atomic (gcov_type *counters, gcov_type value)
> +{
> ...
> 
> The body looks to have data races.  Some kind of cmp_store needed on 
> counters[1]?  Maybe it can't be completely race free?
> 
> nathan

You are right, as we would need to atomically change 2 values (counters[0] and 
counters[1]),
it's impossible IMHO. It's question what to do with that:

1) atomically update just counters[2] and live with data racing for the first 2 
values
2) add (probably conditionally) a spin lock
3) do not handle thread-safety of indirect call counters at all

Thoughts?
Martin


Re: [PING] Use correct location information for OpenACC shape and simple clauses in C/C++

2016-08-04 Thread David Malcolm
On Thu, 2016-08-04 at 16:54 +0200, Thomas Schwinge wrote:
> Hi!
> 
> On Wed, 27 Jul 2016 17:09:38 -0400, David Malcolm <
> dmalc...@redhat.com> wrote:
> > On Wed, 2016-07-27 at 17:17 +0200, Thomas Schwinge wrote:
> > > I found that for a lot of OpenACC (and potentially also OpenMP)
> > > clauses
> > > (in C/C++ front ends; didn't look at Fortran), we use wrong
> > > location
> > > information.  The problem is that
> > > c_parser_oacc_all_clauses/c_parser_omp_all_clauses calls
> > > cp_parser_omp_clause_name to determine the pragma_omp_clause
> > > c_kind,
> > > and
> > > that function (as documented) consumes the clause token before
> > > returning.
> > > So, when we then do "c_parser_peek_token (parser)->location" or
> > > similar
> > > in some clause parsing function, that will return the location
> > > information of the token _after_ the clause token, which -- at
> > > least
> > > very
> > > often -- is not desirable, in particular if that location
> > > information
> > > is
> > > used then in a build_omp_clause call, which should point to the
> > > clause
> > > token itself, and not whatever follows after that.
> > > 
> > > Probably that all went unnoticed for so long, because the GCC
> > > testsuite
> > > largely is running with -fno-diagnostics-show-caret, so we don't
> > > visually
> > > see the wrong location information (and nobody pays attention to
> > > the
> > > colum information as given, for example, as line 2, column 32 in
> > > "[...]/c-c++-common/goacc/routine-2.c:2:32: error: [...]".
> > 
> > > There seems to be a lot of inconsistency in that in all the
> > > clause
> > > parsing; here is just a first patch to fix the immediate problem
> > > I've
> > > been observing.  OK for trunk already, or need to clean this all
> > > up
> > > in
> > > one go?  Do we need this on release branches, as a "quality of
> > > implementation" fix (wrong diagnostic locations)?
> 
> > > [initial patch]
> 
> Ping for that one.
> 
> 
> > I'm not a reviewer for the C/C++ FEs so I can't really review this
> > patch
> 
> I think in your position as a maintainer for "diagnostic messages",
> you
> should be qualified to exercise that status to approve such a patch. 
>  :-)

I don't know exactly where the boundaries of that role are; I've been
assuming it means anything relating to the diagnostic subsystem itself
(and location-tracking), as opposed to *usage* of the system.  The
patch in question is more about the latter.  That said, your patch
looks very reasonable to me (but as I mentioned, a test case
demonstrating the improved caret locations would be good).

Steering committee: am I being too conservative in my interpretation of
that role?

> 
> > but it might be nice in this (or in a followup) to add some test
> > cases for this that explicitly test the caret information for some
> > of
> > these errors.  [...]
> 
> > Hope this is constructive
> 
> It certainly is, thanks!  In fact, I had planned to look up how to do
> that, which you've now made simpler by providing a specific receipe.
> 
> 
> Grüße
>  Thomas


C++ PATCH for c++/72796 (wrong resolution of scoped call)

2016-08-04 Thread Jason Merrill
On this testcase, finish_class_member_access_expr unpacked the
SCOPE_REF, changing the name variable, and then used that variable to
return a dependent COMPONENT_REF.  We are already holding onto the
original value of name, we should use that instead.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit c3d8ecaeee56d1d7009a6484db66f84ef6031a66
Author: Jason Merrill 
Date:   Wed Aug 3 17:20:31 2016 -0400

PR c++/72796 - wrong resolution of scoped method call.

* typeck.c (finish_class_member_access_expr): Avoid stripping
SCOPE_REF to dependent base.

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index d4bfb11..78d443b 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -2683,7 +2683,7 @@ finish_class_member_access_expr (cp_expr object, tree 
name, bool template_p,
{
dependent:
  return build_min_nt_loc (UNKNOWN_LOCATION, COMPONENT_REF,
-  orig_object, name, NULL_TREE);
+  orig_object, orig_name, NULL_TREE);
}
   object = build_non_dependent_expr (object);
 }
diff --git a/gcc/testsuite/g++.dg/template/dependent-base2.C 
b/gcc/testsuite/g++.dg/template/dependent-base2.C
new file mode 100644
index 000..b418832
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/dependent-base2.C
@@ -0,0 +1,18 @@
+// PR c++/72796
+// { dg-do compile { target c++11 } }
+
+struct a;
+template  struct b { typedef a c; };
+struct d {
+  void e(int);
+};
+struct a : d {
+  void e(int) = delete;
+};
+template  struct g : b::c {
+  g(int) { this->d::e(0); }
+};
+struct h : g<0> {
+  using i = g;
+  h() : i(0) {}
+};


C++ PATCH to rename TYPE_ANONYMOUS_P to TYPE_UNNAMED_P

2016-08-04 Thread Jason Merrill
Martin pointed out that the macro name TYPE_ANONYMOUS_P is confusing
because it doesn't identify anonymous structs/unions; the C++ standard
consistently refers to classes with no name as "unnamed", not
"anonymous", so let's use that term internally as well.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 9d6463c81dbccd54537e60544117bf233f6af3a8
Author: Jason Merrill 
Date:   Wed Aug 3 07:45:04 2016 -0400

Rename TYPE_ANONYMOUS_P to TYPE_UNNAMED_P.

* cp-tree.h (TYPE_UNNAMED_P): Rename from TYPE_ANONYMOUS_P.
(TYPE_WAS_UNNAMED): Rename from TYPE_WAS_ANONYMOUS.
* class.c, decl.c, decl2.c, error.c, lambda.c, mangle.c,
name-lookup.c, parser.c, pt.c, semantics.c, tree.c: Adjust.

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index b537b7e..f834965 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -3077,11 +3077,11 @@ finish_struct_anon_r (tree field, bool complain)
 the TYPE_DECL that we create implicitly.  You're
 allowed to put one anonymous union inside another,
 though, so we explicitly tolerate that.  We use
-TYPE_ANONYMOUS_P rather than ANON_AGGR_TYPE_P so that
+TYPE_UNNAMED_P rather than ANON_AGGR_TYPE_P so that
 we also allow unnamed types used for defining fields.  */
   if (DECL_ARTIFICIAL (elt)
  && (!DECL_IMPLICIT_TYPEDEF_P (elt)
- || TYPE_ANONYMOUS_P (TREE_TYPE (elt
+ || TYPE_UNNAMED_P (TREE_TYPE (elt
continue;
 
   if (TREE_CODE (elt) != FIELD_DECL)
@@ -6718,7 +6718,7 @@ find_flexarrays (tree t, flexmems_t *fmem)
   tree fldtype = TREE_TYPE (fld);
   if (TREE_CODE (fld) != TYPE_DECL
  && RECORD_OR_UNION_TYPE_P (fldtype)
- && TYPE_ANONYMOUS_P (fldtype))
+ && TYPE_UNNAMED_P (fldtype))
{
  /* Members of anonymous structs and unions are treated as if
 they were members of the containing class.  Descend into
@@ -6798,7 +6798,7 @@ diagnose_flexarrays (tree t, const flexmems_t *fmem)
 {
   /* Members of anonymous structs and unions are considered to be members
  of the containing struct or union.  */
-  if (TYPE_ANONYMOUS_P (t) || !fmem->array)
+  if (TYPE_UNNAMED_P (t) || !fmem->array)
 return;
 
   const char *msg = 0;
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 76616c6..d6fb387 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -761,7 +761,7 @@ struct GTY (()) tree_trait_expr {
   enum cp_trait_kind kind;
 };
 
-/* Based off of TYPE_ANONYMOUS_P.  */
+/* Based off of TYPE_UNNAMED_P.  */
 #define LAMBDA_TYPE_P(NODE) \
   (CLASS_TYPE_P (NODE) && CLASSTYPE_LAMBDA_EXPR (NODE))
 
@@ -1553,7 +1553,7 @@ enum languages { lang_c, lang_cplusplus, lang_java };
 #define TYPE_NAME_LENGTH(NODE) (IDENTIFIER_LENGTH (TYPE_IDENTIFIER (NODE)))
 
 /* Nonzero if NODE has no name for linkage purposes.  */
-#define TYPE_ANONYMOUS_P(NODE) \
+#define TYPE_UNNAMED_P(NODE) \
   (OVERLOAD_TYPE_P (NODE) && anon_aggrname_p (TYPE_LINKAGE_IDENTIFIER (NODE)))
 
 /* The _DECL for this _TYPE.  */
@@ -4116,7 +4116,8 @@ more_aggr_init_expr_args_p (const 
aggr_init_expr_arg_iterator *iter)
 
 /* Define fields and accessors for nodes representing declared names.  */
 
-#define TYPE_WAS_ANONYMOUS(NODE) (LANG_TYPE_CLASS_CHECK (NODE)->was_anonymous)
+/* Nonzero if TYPE is an unnamed class with a typedef for linkage purposes.  */
+#define TYPE_WAS_UNNAMED(NODE) (LANG_TYPE_CLASS_CHECK (NODE)->was_anonymous)
 
 /* C++: all of these are overloaded!  These apply only to TYPE_DECLs.  */
 
@@ -4270,8 +4271,8 @@ more_aggr_init_expr_args_p (const 
aggr_init_expr_arg_iterator *iter)
equivalent to `struct S {}; typedef struct S S;' in C.
DECL_IMPLICIT_TYPEDEF_P will hold for the typedef indicated in this
example.  In C++, there is a second implicit typedef for each
-   class, in the scope of `S' itself, so that you can say `S::S'.
-   DECL_SELF_REFERENCE_P will hold for that second typedef.  */
+   class, called the injected-class-name, in the scope of `S' itself, so that
+   you can say `S::S'.  DECL_SELF_REFERENCE_P will hold for that typedef.  */
 #define DECL_IMPLICIT_TYPEDEF_P(NODE) \
   (TREE_CODE (NODE) == TYPE_DECL && DECL_LANG_FLAG_2 (NODE))
 #define SET_DECL_IMPLICIT_TYPEDEF_P(NODE) \
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index d2ba7ba..04a0df6 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4614,7 +4614,7 @@ check_tag_decl (cp_decl_specifier_seq *declspecs,
 }
   /* Check for an anonymous union.  */
   else if (declared_type && RECORD_OR_UNION_CODE_P (TREE_CODE (declared_type))
-  && TYPE_ANONYMOUS_P (declared_type))
+  && TYPE_UNNAMED_P (declared_type))
 {
   /* 7/3 In a simple-declaration, the optional init-declarator-list
 can be omitted only when declaring a class (clause 9) or
@@ -10773,7 +10773,7 @@ grokdeclarator (const cp_declarator *declarator,
  && unqualified_id
  && TYPE_NAME (type)
  && TREE_CODE (TYPE_NAME (type)) ==

Re: [PATCH][AArch64] Improve stack adjustment

2016-08-04 Thread Richard Earnshaw (lists)
On 04/08/16 12:06, Wilco Dijkstra wrote:
> Improve stack adjustment by reusing a temporary move immediate 
> from the epilog if the register is still valid in the epilog.  This generates
> smaller code for leaf functions:
> 
> mov x16, 4
> sub sp, sp, x16
> ldr w0, [sp, w0, sxtw 2]
> add sp, sp, x16
> ret
> 
> Passes GCC regression tests.
> 
> ChangeLog:
> 2016-08-04  Wilco Dijkstra  
> 
> gcc/
>   * config/aarch64/aarch64.c (aarch64_add_constant):
>   Add extra argument to allow emitting the move immediate.
>   Use add/sub with positive immediate.
>   (aarch64_expand_epilogue): Decide when to leave out move.
> 
> testsuite/
>   * gcc.target/aarch64/test_frame_17.c: New test.
> --
> 

I see you've added a default argument for your new parameter.  I think
doing that is fine, but I have two comments about how we might use that
in this case.

Firstly, if this parameter is suitable for having a default value, then
I think the preceding one should also be treated in the same way.

Secondly, I think (due to these parameters being BOOL with no useful
context to make it clear which is which) that having wrapper functions
(inlined, of course) that describe the intended behaviour more clearly
would be useful.  So, defining

static inline void
aarch64_add_frame_constant (mode, regnum, scratchreg, delta)
{
   aarch64_add_frame_constant (mode, regnum, scratchreg, delta, true);
}

would make it clearer at the call point than having a lot of true and
false parameters scattered round the code.

Alternatively we might remove all the default parameters and require
wrappers like the above to make it more explicit which API is intended -
this might make more sense if not all combinations make sense.

R.

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> ce2cc5ae3e1291f4ef4a8408461678c9397b06bd..5b59e4dd157351f301fc563a724cefe8a9be132c
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1941,15 +1941,21 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
>  }
>  
>  /* Add DELTA to REGNUM in mode MODE.  SCRATCHREG can be used to held
> -   intermediate value if necessary.
> +   intermediate value if necessary.  FRAME_RELATED_P should be true if
> +   the RTX_FRAME_RELATED flag should be set and CFA adjustments added
> +   to the generated instructions.  If SCRATCHREG is known to hold
> +   abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
> +   immediate again.
>  
> -   This function is sometimes used to adjust the stack pointer, so we must
> -   ensure that it can never cause transient stack deallocation by writing an
> -   invalid value into REGNUM.  */
> +   Since this function may be used to adjust the stack pointer, we must
> +   ensure that it cannot cause transient stack deallocation (for example
> +   by first incrementing SP and then decrementing when adjusting by a
> +   large immediate).  */
>  
>  static void
>  aarch64_add_constant (machine_mode mode, int regnum, int scratchreg,
> -   HOST_WIDE_INT delta, bool frame_related_p)
> +   HOST_WIDE_INT delta, bool frame_related_p,
> +   bool emit_move_imm = true)
>  {
>HOST_WIDE_INT mdelta = abs_hwi (delta);
>rtx this_rtx = gen_rtx_REG (mode, regnum);
> @@ -1967,11 +1973,11 @@ aarch64_add_constant (machine_mode mode, int regnum, 
> int scratchreg,
>return;
>  }
>  
> -  /* We need two add/sub instructions, each one performing part of the
> - calculation.  Don't do this if the addend can be loaded into register 
> with
> - a single instruction, in that case we prefer a move to a scratch 
> register
> - following by an addition.  */
> -  if (mdelta < 0x100 && !aarch64_move_imm (delta, mode))
> +  /* We need two add/sub instructions, each one perform part of the
> + addition/subtraction, but don't this if the addend can be loaded into
> + register by single instruction, in that case we prefer a move to scratch
> + register following by addition.  */
> +  if (mdelta < 0x100 && !aarch64_move_imm (mdelta, mode))
>  {
>HOST_WIDE_INT low_off = mdelta & 0xfff;
>  
> @@ -1985,8 +1991,10 @@ aarch64_add_constant (machine_mode mode, int regnum, 
> int scratchreg,
>  
>/* Otherwise use generic function to handle all other situations.  */
>rtx scratch_rtx = gen_rtx_REG (mode, scratchreg);
> -  aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (delta), true, mode);
> -  insn = emit_insn (gen_add2_insn (this_rtx, scratch_rtx));
> +  if (emit_move_imm)
> +aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, 
> mode);
> +  insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx)
> +   : gen_add2_insn (this_rtx, scratch_rtx));
>if (frame_related_p)
>  {
>RTX_FRAME_RELATED_P (insn) = frame_related_p;
> @@ -3288,7 +3296,8 @@ aarch

C++ PATCH for c++/72415 (concepts ICE with fold-expression constraint)

2016-08-04 Thread Jason Merrill
Substituting into the fold-expression was producing an
EXPR_PACK_EXPANSION, but it would be better to keep it as a
fold-expression.  tsubst_unary_left_fold et al try to do this, but
weren't recognizing the case where tsubst_pack_expansion returns a
TREE_VEC containing a single pack expansion.  In that case, let's just
return the pack expansion without the TREE_VEC, as we do earlier in
tsubst_pack_expansion for similar situations.

Tested x86_64-pc-linux-gnu, applying to trunk and 6.
commit 994ccc9de1117b6388859663cb9817e20ed66c50
Author: Jason Merrill 
Date:   Wed Aug 3 19:19:32 2016 -0400

PR c++/72415 - member template with fold-expression constraint

* pt.c (tsubst_pack_expansion): Pull a single pack expansion out
of the TREE_VEC.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index bf729ea..60c87e0 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -11160,6 +11160,12 @@ tsubst_pack_expansion (tree t, tree args, 
tsubst_flags_t complain,
   local_specializations = saved_local_specializations;
 }
   
+  /* If the dependent pack arguments were such that we end up with only a
+ single pack expansion again, there's no need to keep it in a TREE_VEC.  */
+  if (len == 1 && TREE_CODE (result) == TREE_VEC
+  && PACK_EXPANSION_P (TREE_VEC_ELT (result, 0)))
+return TREE_VEC_ELT (result, 0);
+
   return result;
 }
 
diff --git a/gcc/testsuite/g++.dg/concepts/memfun2.C 
b/gcc/testsuite/g++.dg/concepts/memfun2.C
new file mode 100644
index 000..c186a18
--- /dev/null
+++ b/gcc/testsuite/g++.dg/concepts/memfun2.C
@@ -0,0 +1,21 @@
+// PR c++/72415
+// { dg-options "-std=c++1z -fconcepts" }
+
+template
+struct indices {};
+
+template
+struct foo_type {
+template
+static void impl(indices)
+requires (... && (Indices, true));
+
+static auto caller()
+{ return impl(indices<0, 1, 2> {}); }
+};
+
+int main()
+{
+// internal compiler error: in satisfy_predicate_constraint, at 
cp/constraint.cc:2013
+foo_type::caller();
+}


Re: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection, (PR gcov-profile/67992)

2016-08-04 Thread Martin Liška
On 08/04/2016 05:13 PM, Nathan Sidwell wrote:
> On 08/04/16 10:42, Martin Liška wrote:
> 
>> I decided to use a new enum, hope it's better?
> 
> that's fine.  But you know, if you set the enum values appropriately you 
> could use the | trick rather than the compare you've done (c++ enum type 
> safety would require an overloaded | operator though).  I don't mind either 
> way,

Yeah, I decided to use enum + operator|.

> 
> 
> +get_cycles_count (line_t &linfo, bool handle_negative_cycles = true)
> ...
> +  for (block_t *block = linfo.u.blocks; block; block = block->chain)
> +{
> +  arc_vector_t path;
> +  block_vector_t blocked;
> +  vector block_lists;
> +  result = circuit (block, path, block, blocked, block_lists, linfo, 
> count);
> +}
> +
> +  /* If we have a negative cycle, repeat the find_cycles routine.  */
> +  if (result == NEGATIVE_LOOP && handle_negative_cycles)
> +count += get_cycles_count (linfo, false);
> 
> The retry will depend on the result of the final call of circuit.  Before  it 
> happened if any loop was negated.  Is this change intentional?

That's not intentional, fixed in the new version.
May I install the patch?

Martin

> 
> nathan

>From 4517ea775ff041c8f37faff76b637fd671f269e3 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 4 Aug 2016 12:34:08 +0200
Subject: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection,
 (PR gcov-profile/67992)

gcc/ChangeLog:

2016-08-04  Martin Liska  
	Joshua Cranmer  

	* gcov.c (line_t::has_block): New function.
	(enum loop_type): New enum.
	(handle_cycle): New function.
	(unblock): Likewise.
	(circuit): Likewise.
	(get_cycles_count): Likewise.
	(accumulate_line_counts): Use new loop detection algorithm.
---
 gcc/gcov.c | 291 -
 1 file changed, 190 insertions(+), 101 deletions(-)

diff --git a/gcc/gcov.c b/gcc/gcov.c
index 40701a1..b1ab6e5 100644
--- a/gcc/gcov.c
+++ b/gcc/gcov.c
@@ -41,6 +41,11 @@ along with Gcov; see the file COPYING3.  If not see
 
 #include 
 
+#include 
+#include 
+
+using namespace std;
+
 #define IN_GCOV 1
 #include "gcov-io.h"
 #include "gcov-io.c"
@@ -222,6 +227,9 @@ typedef struct coverage_info
 
 typedef struct line_info
 {
+  /* Return true when NEEDLE is one of basic blocks the line belongs to.  */
+  bool has_block (block_t *needle);
+
   gcov_type count;	   /* execution count */
   union
   {
@@ -235,6 +243,16 @@ typedef struct line_info
   unsigned unexceptional : 1;
 } line_t;
 
+bool
+line_t::has_block (block_t *needle)
+{
+  for (block_t *n = u.blocks; n; n = n->chain)
+if (n == needle)
+  return true;
+
+  return false;
+}
+
 /* Describes a file mentioned in the block graph.  Contains an array
of line info.  */
 
@@ -407,6 +425,168 @@ static void release_structures (void);
 static void release_function (function_t *);
 extern int main (int, char **);
 
+/* Cycle detection!
+   There are a bajillion algorithms that do this.  Boost's function is named
+   hawick_cycles, so I used the algorithm by K. A. Hawick and H. A. James in
+   "Enumerating Circuits and Loops in Graphs with Self-Arcs and Multiple-Arcs"
+   (url at ).
+
+   The basic algorithm is simple: effectively, we're finding all simple paths
+   in a subgraph (that shrinks every iteration).  Duplicates are filtered by
+   "blocking" a path when a node is added to the path (this also prevents non-
+   simple paths)--the node is unblocked only when it participates in a cycle.
+   */
+
+typedef vector arc_vector_t;
+typedef vector block_vector_t;
+
+/* Enum with types of loop in CFG.  */
+
+enum loop_type
+{
+  NO_LOOP,
+  LOOP,
+  NEGATIVE_LOOP
+};
+
+/* Loop_type operator that merges two values: A and B.  */
+
+inline loop_type& operator |= (loop_type& a, loop_type b)
+{
+return a = static_cast (a | b);
+}
+
+/* Handle cycle identified by EDGES, where the function finds minimum cs_count
+   and subtract the value from all counts.  The subtracted value is added
+   to COUNT.  Returns type of loop.  */
+
+static loop_type
+handle_cycle (const arc_vector_t &edges, int64_t &count)
+{
+  /* Find the minimum edge of the cycle, and reduce all nodes in the cycle by
+ that amount.  */
+  int64_t cycle_count = INT64_MAX;
+  for (unsigned i = 0; i < edges.size (); i++)
+{
+  int64_t ecount = edges[i]->cs_count;
+  if (cycle_count > ecount)
+	cycle_count = ecount;
+}
+  count += cycle_count;
+  for (unsigned i = 0; i < edges.size (); i++)
+edges[i]->cs_count -= cycle_count;
+
+  return cycle_count < 0 ? NEGATIVE_LOOP : LOOP;
+}
+
+/* Unblock a block U from BLOCKED.  Apart from that, iterate all blocks
+   blocked by U in BLOCK_LISTS.  */
+
+static void
+unblock (const block_t *u, block_vector_t &blocked,
+	 vector &block_lists)
+{
+  block_vector_t::iterator it = find (blocked.begin (), blocked.end (), u);
+  if (it == blocked.end ())
+return;
+
+  unsigned index

Re: [PR55641] drop spurious const_type from reference_type variables

2016-08-04 Thread Jason Merrill
OK.

On Mon, Aug 1, 2016 at 11:12 PM, Alexandre Oliva  wrote:
> Although C++ reference types, denoted by DW_TAG_reference_type in
> DWARFv2+ debug info, are unchangeable, we output names of reference type
> with DW_TAG_const_type, because internally we mark such variables as
> TREE_READONLY.  That's an internal implementation detail that shouldn't
> leak to debug information.  This patch fixes this.
>
> The testcase is slightly changed from the one attached to the bug
> report, so that it runs in C++98 mode too.
>
> Regstrapped on x86_64-linux-gnu and i686-linux-gnu.  Ok to install?
>
> for  gcc/ChangeLog
>
> PR debug/55641
> * dwarf2out.c (decl_quals): Don't map TREE_READONLY to
> TYPE_QUAL_CONST in reference-typed decls.
>
> for  gcc/testsuite/ChangeLog
>
> PR debug/55641
> * g++.dg/debug/dwarf2/ref-1.C: New.
> ---
>  gcc/dwarf2out.c   |4 
>  gcc/testsuite/g++.dg/debug/dwarf2/ref-1.C |   19 +++
>  2 files changed, 23 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/debug/dwarf2/ref-1.C
>
> diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
> index 8d6eeed..103095f 100644
> --- a/gcc/dwarf2out.c
> +++ b/gcc/dwarf2out.c
> @@ -11135,6 +11135,10 @@ static int
>  decl_quals (const_tree decl)
>  {
>return ((TREE_READONLY (decl)
> +  /* The C++ front-end correctly marks reference-typed
> + variables as readonly, but from a language (and debug
> + info) standpoint they are not const-qualified.  */
> +  && TREE_CODE (TREE_TYPE (decl)) != REFERENCE_TYPE
>? TYPE_QUAL_CONST : TYPE_UNQUALIFIED)
>   | (TREE_THIS_VOLATILE (decl)
>  ? TYPE_QUAL_VOLATILE : TYPE_UNQUALIFIED));
> diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/ref-1.C 
> b/gcc/testsuite/g++.dg/debug/dwarf2/ref-1.C
> new file mode 100644
> index 000..75e9fca
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/debug/dwarf2/ref-1.C
> @@ -0,0 +1,19 @@
> +// { dg-do compile }
> +// { dg-options "-O -g -dA -gno-strict-dwarf" }
> +// { dg-final { scan-assembler-not "DW_TAG_const_type" { xfail { 
> powerpc-ibm-aix* } } } }
> +
> +int x;
> +int &y = x;
> +
> +typedef int &z_t;
> +z_t z = x;
> +
> +void f(int &p) {}
> +
> +struct foo {
> +  int &bar;
> +  typedef int &bart;
> +  bart fool;
> +};
> +
> +void f3(struct foo &p) {}
>
> --
> Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
> You must be the change you wish to see in the world. -- Gandhi
> Be Free! -- http://FSFLA.org/   FSF Latin America board member
> Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: Go patch committed: add escape analysis debugging

2016-08-04 Thread Ian Lance Taylor
On Thu, Aug 4, 2016 at 6:47 AM, Rainer Orth  
wrote:
>
>> This patch by Chris Manghane adds debugging to the escape analysis
>> code.  This debugging is designed to generate the same sort of output
>> as the gc Go compiler, for easier comparison of results.  Escape
>> analysis is still not enabled by default.  Bootstrapped and ran Go
>> testsuite on x86_64-pc-linux-gnu.  Committed to mainline.
>
> this patch (resp. this particular line)
>
>> Index: gcc/go/gofrontend/escape.cc
>> ===
>> --- gcc/go/gofrontend/escape.cc   (revision 238653)
>> +++ gcc/go/gofrontend/escape.cc   (working copy)
>> @@ -6,12 +6,14 @@
>>
>>  #include 
>>  #include 
>> +#include 
>
> broke Solaris bootstrap:
>
>  from /vol/gcc/src/hg/trunk/local/gcc/go/go-system.h:23,
>  from 
> /vol/gcc/src/hg/trunk/local/gcc/go/gofrontend/go-linemap.h:10,
>  from /vol/gcc/src/hg/trunk/local/gcc/go/gofrontend/gogo.h:10,
>  from 
> /vol/gcc/src/hg/trunk/local/gcc/go/gofrontend/escape.cc:11:
> ./auto-host.h:2214:0: error: "_FILE_OFFSET_BITS" redefined [-Werror]
>  #define _FILE_OFFSET_BITS 64
>
> In file included from 
> /var/gcc/regression/trunk/12-gcc-gas/build/prev-gcc/include-fixed/wchar.h:17:0,
>  from 
> /var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/cwchar:44,
>  from 
> /var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/bits/postypes.h:40,
>  from 
> /var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/iosfwd:40,
>  from 
> /var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/ios:38,
>  from 
> /var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/istream:38,
>  from 
> /var/gcc/regression/trunk/12-gcc-gas/build/prev-i386-pc-solaris2.12/libstdc++-v3/include/sstream:38,
>  from 
> /vol/gcc/src/hg/trunk/local/gcc/go/gofrontend/escape.cc:9:
> /var/gcc/regression/trunk/12-gcc-gas/build/prev-gcc/include-fixed/sys/feature_tests.h:223:0:
>  note: this is the location of the previous definition
>  #define _FILE_OFFSET_BITS 32
>
> Including anything before "config.h" (or in the case of Go "go-system.h")
> is fragile at best.
>
> The following patch allowed me to compile escape.cc again.

Thanks.  Committed.

Ian


Re: PING: new pass to warn on questionable uses of alloca() and VLAs

2016-08-04 Thread Jeff Law

On 07/27/2016 03:01 AM, Aldy Hernandez wrote:

Just in case this got lost in noise, since I know there was a lot of
back and forth between Martin Sebor and I.

This is the last iteration.

Tested on x86-64 Linux.

OK for trunk?

curr


gcc/

* Makefile.in (OBJS): Add gimple-ssa-warn-walloca.o.
* passes.def: Add two instances of pass_walloca.
* tree-pass.h (make_pass_walloca): New.
* gimple-ssa-warn-walloca.c: New file.
* doc/invoke.texi: Document -Walloca, -Walloca-larger-than=, and
-Wvla-larger-than= options.

gcc/c-family/

* c.opt (Walloca): New.
(Walloca-larger-than=): New.
(Wvla-larger-than=): New.
As someone already noted, it's gimple-ssa-warn-alloca, not 
gimple-ssa-warn-walloca for the ChangeLog entry.


On the nittish side, you're mixing C and C++ comment styles.  Choosing 
one and sticking with it seems better :-)





+@item -Walloca
+@opindex Wno-alloca
+@opindex Walloca
+This option warns on all uses of @code{alloca} in the source.
+
+@item -Walloca-larger-than=@var{n}
+This option warns on calls to @code{alloca} that are not bounded by a
+controlling predicate limiting its size to @var{n} bytes, or calls to
+@code{alloca} where the bound is unknown.
So for each of these little examples, I'd stuff the code into a trivial 
function definition and make "n" a parameter.  That way it's obvious the 
value of "n" comes from a context where we don't initially know its 
range, but we may be able to narrow the range due to statements in the 
function.


;

+
+class pass_walloca : public gimple_opt_pass
+{
+public:
+  pass_walloca (gcc::context *ctxt)
+: gimple_opt_pass(pass_data_walloca, ctxt), first_time_p (false)
+  {}
+  opt_pass *clone () { return new pass_walloca (m_ctxt); }
+  void set_pass_param (unsigned int n, bool param)
+{
+  gcc_assert (n == 0);
+  first_time_p = param;
+}
ISTM that you're using "first_time_p" here, but in passes.def you refer 
to this parameter as "strict_mode_p" in comments.


ie:

+  NEXT_PASS (pass_walloca, /*strict_mode_p=*/false);

I'd just drop the /*strict_mode_p*/ comment in both places it appears in 
your patch's change to passes.def.  I think we've generally frowned on 
those embedded comments, even though some have snuck in.



+
+// We have a few heuristics up our sleeve to determine if a call to
+// alloca() is within bounds.  Try them out and return the type of
+// alloca call this is based on its argument.
+//
+// Given a known argument (ARG) to alloca() and an EDGE (E)
+// calculating said argument, verify that the last statement in the BB
+// in E->SRC is a gate comparing ARG to an acceptable bound for
+// alloca().  See examples below.
+//
+// MAX_SIZE is WARN_ALLOCA= adjusted for VLAs.  It is the maximum size
+// in bytes we allow for arg.
+//
+// If the alloca bound is determined to be too large, ASSUMED_LIMIT is
+// set to the bound used to determine this.  ASSUMED_LIMIT is only set
+// for ALLOCA_BOUND_MAYBE_LARGE and ALLOCA_BOUND_DEFINITELY_LARGE.
+//
+// Returns the alloca type.
+
+static enum alloca_type
+alloca_call_type_by_arg (tree arg, edge e, unsigned max_size,
+wide_int *assumed_limit)
So I wonder if you ought to have a structure here for the return value 
which contains the alloca type and assumed limit.  I know in the past we 
avoided aggregate returns, but these days that doesn't seem necessary. 
Seems cleaner than having a return value and output parameters.



+{
+  // All the tests bellow depend on the jump being on the TRUE path.
+  if (!(e->flags & EDGE_TRUE_VALUE))
+return ALLOCA_UNBOUNDED;
Seems like a fairly arbitrary and undesirable limitation.  Couldn't the 
developer just have easily written


if (arg > N>
   x = malloc (...)
else
   x = alloca (...)

It also seems like you'd want to handle the set of LT/LE/GT/GE rather 
than just LE.  Or is it the case that we always canonicalize LT into LE 
by adjusting the constant (I vaguely remember running into that in RTL, 
so it's entirely possible and there'd likely be a canonicalization of 
GT/GE as well).


It also seems that once Andrew's infrastructure is in place this becomes 
dead code as we can just ask for the range at a point in the program, 
including for each incoming edge.  You might want a comment to that effect.






+
+  /* Check for:
+ if (arg .cond. LIMIT) -or- if (LIMIT .cond. arg)
+   alloca(arg);
+
+ Where LIMIT has a bound of unknown range.  */
+  tree limit = NULL;
+  if (gimple_cond_lhs (last) == arg)
+limit = gimple_cond_rhs (last);
+  else if (gimple_cond_rhs (last) == arg)
+limit = gimple_cond_lhs (last);
+  if (limit && TREE_CODE (limit) == SSA_NAME)
+{
+  wide_int min, max;
+  value_range_type range_type = get_range_info (limit, &min, &max);
+  if (range_type == VR_UNDEFINED || range_type == VR_VARYING)
+   return ALLOCA_BOUND_UNKNOWN;
+  // FIXME: We could try harder here and handle a possib

Re: [PATCH 1/4] Cherry-pick fprofile-generate-atomic from google/gcc-4_9 branch

2016-08-04 Thread Nathan Sidwell

On 08/04/16 11:34, Martin Liška wrote:

On 08/04/2016 04:48 PM, Nathan Sidwell wrote:

diff --git a/libgcc/libgcov-profiler.c b/libgcc/libgcov-profiler.c
+static inline void
+__gcov_one_value_profiler_body_atomic (gcov_type *counters, gcov_type value)
+{
...

The body looks to have data races.  Some kind of cmp_store needed on 
counters[1]?  Maybe it can't be completely race free?

nathan


You are right, as we would need to atomically change 2 values (counters[0] and 
counters[1]),
it's impossible IMHO. It's question what to do with that:

1) atomically update just counters[2] and live with data racing for the first 2 
values
2) add (probably conditionally) a spin lock
3) do not handle thread-safety of indirect call counters at all


Thanks for confirming my thoughts.

For this case there are three 'counters'
1) a value we're checking.  Set when the delta is zero.
2) a count of number of uses
3) a count on the delta of the uses that matched and the uses that did not.

Notice that the recorded value can change, whenever the delta returns to zero. 
That's intentional.  This has the side effect of preventing the delta ever going 
negative.


The tricky case is resetting the value when the delta is zero.  We can't 
simultaneously set the delta and the value.  We could use a 2 step process 
though -- set delta to 'updating', set value, set delta to 1. That will put a 
compare_exchange in the hot path though ... and still turns out to be tricky.


How about:
gcov_t expected;
atomic_load (&counter[0],  val, ...);
gcov_t delta = val == value ? 1 : -1;
atomic_add (&counter[1], delta);   <-- or atomic_add_fetch
if (delta < 0) {
  /* can we set counter[0]? */
  atomic_load (&counter[1], &expected, ...);
  if (expected < 0) {
atomic_store (&counter[0], value, ...);
atomic_add (&counter[1], 2, ...);
  }
}
atomic_add (&counter[2], 1, ...);

This does have a race condition -- two threads could get into the inner if body. 
 But I think that's harmless.  One of them will win  the store of value, and 
both of them will restore the delta counter.  We'll end up with delta being 1 
too high.


wdyt?

nathan


Re: Implement -Wimplicit-fallthrough (take 2): the rest

2016-08-04 Thread Jeff Law

On 08/04/2016 06:36 AM, Michael Matz wrote:

Hi,

On Wed, 27 Jul 2016, Marek Polacek wrote:


And this is the rest.  Either I just adjusted a falls through comment,
or I added __builtin_fallthrough ().  These were the cases where I was
fairly sure that the fall through is intentional.


I saw one case where I think the warning is a bit over-active:

@@ -42072,6 +42089,7 @@ rdseed_step:
 case IX86_BUILTIN_ADDCARRYX64:
   icode = CODE_FOR_addcarrydi;
   mode0 = DImode;
+  gcc_fallthrough ();

 handlecarry:
   arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in.  */

I.e. it also warns if the following label is not a case label but a normal
one.  I don't think this counts as a classical fall-through and it IMHO
should not be warned about nor should it be marked.
It's probably the same underlying issue I saw with a false-positive in 
one of the other patches.


jeff



Re: libgo patch committed: Update to 1.7rc3

2016-08-04 Thread Ian Lance Taylor
On Thu, Aug 4, 2016 at 1:11 AM, Uros Bizjak  wrote:
> On Thu, Aug 4, 2016 at 12:53 AM, Ian Lance Taylor  wrote:
>> On Thu, Jul 28, 2016 at 4:24 AM, Uros Bizjak  wrote:
>>>
>>> A new testsuite failure is introduced:
>>>
>>> FAIL: text/template
>>>
>>> on both, x86_64-linux-gnu and alpha-linux-gnu.
>>>
>>> The testcase corrupts stack with a too deep recursion.
>>>
>>> There is a part in libgo/go/text/template/exec.go that should handle
>>> this situaiton:
>>>
>>> // maxExecDepth specifies the maximum stack depth of templates within
>>> // templates. This limit is only practically reached by accidentally
>>> // recursive template invocations. This limit allows us to return
>>> // an error instead of triggering a stack overflow.
>>> const maxExecDepth = 10
>>>
>>> but the limit is either set too high, or the error handling code is
>>> inefficient on both, split-stack (x86_64) and non-split-stack (alpha)
>>> targets. Lowering this value to 1 "fixes" the testcase on both
>>> targets.
>>
>> I can not recreate this problem on x86 or x86_64.
>>
>> Does this patch work around the problem on Alpha?
>
> Yes, the patch "fixes" the problem on alpha, but I still see the
> failure on x86_64, even with the unlimited stack.

OK, I was able to recreate this by using GNU ld rather than gold.  I
have committed the appended patch to reduce the number of recursive
template invocations, since you said that 1 did let the test pass
for you, and it works for me using GNU ld.  This number is still high
enough to not cut off any reasonable template execution.

For this patch bootstrapped and ran Go testsuite on
x86_64-pc-linux-gnu, using both GNU ld and gold.  Committed to
mainline.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 239140)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-ca5b64137f013e3104fd74ee7d07ba556a501187
+235dffb0de1e99d6f521f052067f0e936bf63baa
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/go/text/template/exec.go
===
--- libgo/go/text/template/exec.go  (revision 238662)
+++ libgo/go/text/template/exec.go  (working copy)
@@ -19,7 +19,9 @@ import (
 // templates. This limit is only practically reached by accidentally
 // recursive template invocations. This limit allows us to return
 // an error instead of triggering a stack overflow.
-const maxExecDepth = 10
+// For gccgo we make this 1 rather than 10 to avoid stack overflow
+// on non-split-stack systems.
+const maxExecDepth = 1
 
 // state represents the state of an execution. It's not part of the
 // template so that multiple executions of the same template


Re: [PATCH] Fix wrong code on aarch64 due to paradoxical subreg

2016-08-04 Thread James Greenhalgh
On Wed, Aug 03, 2016 at 04:08:30PM -0600, Jeff Law wrote:
> On 08/03/2016 11:41 AM, Bernd Edlinger wrote:
> >On 08/03/16 17:38, Jeff Law wrote:
> >>cse.c changes look good, but I'd really like to see a testcase for each
> >>issue in the dejagnu framework.  Extra points if you tried to build a
> >>unit test using David M's framework, but that isn't required.
> >>
> >>The testcase from 70903 ought to be trivial to add to the dejagnu suite.
> >>  71779 might be more difficult, but if you could take a stab, it'd be
> >>appreciated.
> >>
> >
> >
> >Yes, sure.  I had assumed that the pr70903 test case is using some
> >target-specific vector types, but now I see that it even works as-is in
> >the gcc.c-torture/execute directory.
> >
> >So I've added the test case to the cse patch.  And quickly verified that
> >it works on x86_64-linux-gnu.
> >
> >
> >The pr71779 test case will be pretty difficult to reduce, because it
> >depends on combine to do the incorrect transformation and lra to spill
> >the subreg, and on the stack content at runtime to be non-zero.
> >
> >But technically it *is* already in the isl-test suite, so if isl is
> >in-tree, it is always executed by make check or make check-isl.
> >
> >It is just that gmp/mpfr/mpc and isl test results are not included by
> >contrib/test_summary, but that should be fixable.  What do you think?
> >
> >Actually that should not be too difficult, as there are test-suite.log
> >files that we could just added to the test_summary output as-is, for
> >instance:
> >
> >cat isl/test-suite.log
> >
> >==
> >isl 0.16.1: ./test-suite.log
> >==
> >
> ># TOTAL: 5
> ># PASS:  5
> ># SKIP:  0
> ># XFAIL: 0
> ># FAIL:  0
> ># XPASS: 0
> ># ERROR: 0
> >
> >.. contents:: :depth: 2
> >
> >
> >Are the patches OK now?
> Yes.  Thanks for taking care of this...
> 

Hi Bernd,

Thanks for fixing this, but it looks like you accidentally double-added
the pr70903.c testcase.

  Failures:
gcc.c-torture/execute/pr70903.c

  Bisected to:

  Author: edlinger 
  Date:   Thu Aug 4 13:20:57 2016 +

2016-08-04  Bernd Edlinger  

PR rtl-optimization/70903
* cse.c (cse_insn): If DEST is a paradoxical SUBREG, don't record 
DEST.

testsuite:
2016-08-04  Bernd Edlinger  

PR rtl-optimization/70903
* gcc.c-torture/execute/pr70903.c: New test.

  .../gcc/testsuite/gcc.c-torture/execute/pr70903.c:25:1: error: redefinition 
of 'foo'
  .../gcc/testsuite/gcc.c-torture/execute/pr70903.c:6:1: note: previous 
definition of 'foo' was here
  .../gcc/testsuite/gcc.c-torture/execute/pr70903.c:31:5: error: redefinition 
of 'main'
  .../gcc/testsuite/gcc.c-torture/execute/pr70903.c:12:5: note: previous 
definition of 'main' was here

I've fixed that up as so in revision 239142, I hope you agree the change
is obvious.

Thanks,
James

---
2016-08-04  James Greenhalgh  

* gcc.c-torture/execute/pr70903.c: Fix duplicate body.


diff --git a/gcc/testsuite/gcc.c-torture/execute/pr70903.c 
b/gcc/testsuite/gcc.c-torture/execute/pr70903.c
index 6ffd0aa..175ad1a 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr70903.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr70903.c
@@ -17,22 +17,4 @@ int main ()
 __builtin_abort();
   return 0;
 }
-typedef unsigned char V8 __attribute__ ((vector_size (32)));
-typedef unsigned int V32 __attribute__ ((vector_size (32)));
-typedef unsigned long long V64 __attribute__ ((vector_size (32)));
-
-static V32 __attribute__ ((noinline, noclone))
-foo (V64 x)
-{
-  V64 y = (V64)(V8){((V8)(V64){65535, x[0]})[1]};
-  return (V32){y[0], 255};
-}
 
-int main ()
-{
-  V32 x = foo ((V64){});
-//  __builtin_printf ("%08x %08x %08x %08x %08x %08x %08x %08x\n", x[0], x[1], 
x[2], x[3], x[4], x[5], x[6], x[7]);
-  if (x[1] != 255)
-__builtin_abort();
-  return 0;
-}




Re: [PATCH] gcov tool: Implement Hawick's algorithm for cycle detection, (PR gcov-profile/67992)

2016-08-04 Thread Nathan Sidwell

On 08/04/16 12:10, Martin Liška wrote:

On 08/04/2016 05:13 PM, Nathan Sidwell wrote:

On 08/04/16 10:42, Martin Liška wrote:


I decided to use a new enum, hope it's better?


that's fine.  But you know, if you set the enum values appropriately you could 
use the | trick rather than the compare you've done (c++ enum type safety would 
require an overloaded | operator though).  I don't mind either way,


Yeah, I decided to use enum + operator|.


You have a bug. The enum values are {0,1,2},  So the result of meeting both 
regular and reversed loops will be the value '3'.  so the check for == 
NEGATIVE_LOOP could erroneously fail.  Fixable by making NEGATIVE_LOOP's value 2 
+ LOOP (or many other variants on that theme).




+  if (w == start)
+   {
+ /* Cycle has been found.  */
+ result |= handle_cycle (path, count);
+   }

{...} not necessary here (even with the comment).


Re: Fix fir PR71696 in Libiberty Demangler (6)

2016-08-04 Thread Jeff Law

On 08/04/2016 01:07 AM, Marcel Böhme wrote:

Hi Jeff,


Can you take care of the minor issues above, retest & repost?


Sure. I removed the whitespace nits, used XDUPVEC instead of XNEWVEC+memcpy, 
and adjusted the growing heuristics of the new array proctypevec. The revised 
patch is attached below. Bootstrapped and regression tested on 
x86_64-pc-linux-gnu and checked PR71696 is resolved.


+   for (i = 0; i < work -> nproctypes; i++)
+ if (work -> proctypevec [i] == n)
+   success = 0;

So presumably this doesn't happen all that often or this could get expensive
and we'd want something more efficient for searching, right?


It seems, at least for the cases in the Demangler test suite, the loop executes 
never more than one iteration.






Index: libiberty/ChangeLog
===
--- libiberty/ChangeLog (revision 239112)
+++ libiberty/ChangeLog (working copy)
@@ -1,3 +1,20 @@
+2016-08-04  Marcel Böhme  
+
+   PR c++/71696
+   * cplus-dem.c: Prevent infinite recursion when there is a cycle
+   in the referencing of remembered mangled types.
+   (work_stuff): New stack to keep track of the remembered mangled
+   types that are currently being processed.
+   (push_processed_type): New method to push currently processed
+   remembered type onto the stack.
+   (pop_processed_type): New method to pop currently processed
+   remembered type from the stack.
+   (work_stuff_copy_to_from): Copy values of new variables.
+   (delete_non_B_K_work_stuff): Free stack memory.
+   (demangle_args): Push/Pop currently processed remembered type.
+   (do_type): Do not demangle a cyclic reference and push/pop
+   referenced remembered type.

THanks.  Installed and BZ updated.

jeff



[PATCH] do not throw in std::make_exception_ptr

2016-08-04 Thread Gleb Natapov
Instead of throwing an exception allocate its memory and initialize it
explicitly. Makes std::make_exception_ptr more efficient since no stack
unwinding is needed.

In this version I hopefully addressed all Jonathan comments.

* libsupc++/exception (std::exception): Move...
* libsupc++/exception.h: ...here; New.
* libsupc++/cxxabi.h (__cxa_allocate_exception): Move...
* libsupc++/cxxabi_init_exception.h: ...here and add
__cxa_init_primary_exception; New.
* config/abi/pre/gnu-versioned-namespace.ver: add
__cxa_init_primary_exception and std::exception_ptr(void*)
* config/abi/pre/gnu.ver (CXXABI_1.3.11) : add
__cxa_init_primary_exception and std::exception_ptr(void*)
(CXXABI_1.3.11): New.
* include/Makefile.am: add exception.h and cxxabi_init_exception.h 
* include/Makefile.in: Likewise.
* libsupc++/Makefile.am: add exception.h and cxxabi_init_exception.h
* libsupc++/Makefile.in: Likewise.
* libsupc++/eh_throw.cc(__cxa_throw): add __cxa_init_primary_exception
and use it
* libsupc++/exception_ptr.h(std::make_exception_ptr): use
__cxa_allocate_exception and __cxa_init_primary_exception to create
exception pointer
* libsupc++/typeinfo: include bits/exception.h instead of exception


diff --git a/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver 
b/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver
index 8304dee..65866a3 100644
--- a/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver
+++ b/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver
@@ -179,6 +179,7 @@ CXXABI_2.0 {
 __cxa_free_exception;
 __cxa_free_dependent_exception;
 __cxa_get_exception_ptr;
+__cxa_init_primary_exception;
 __cxa_get_globals;
 __cxa_get_globals_fast;
 __cxa_guard_abort;
@@ -205,6 +206,7 @@ CXXABI_2.0 {
 # std::exception_ptr
 _ZNSt15__exception_ptr13exception_ptrC1Ev;
 _ZNSt15__exception_ptr13exception_ptrC2Ev;
+_ZNSt15__exception_ptr13exception_ptrC1EPv;
 _ZNSt15__exception_ptr13exception_ptrC1ERKS0_;
 _ZNSt15__exception_ptr13exception_ptrC2ERKS0_;
 _ZNSt15__exception_ptr13exception_ptrC1EMS0_FvvE;
diff --git a/libstdc++-v3/config/abi/pre/gnu.ver 
b/libstdc++-v3/config/abi/pre/gnu.ver
index b7f54e2..f51c6f9 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -2164,6 +2164,13 @@ CXXABI_1.3.10 {
 
 } CXXABI_1.3.9;
 
+CXXABI_1.3.11 {
+
+__cxa_init_primary_exception;
+_ZNSt15__exception_ptr13exception_ptrC1EPv;
+
+} CXXABI_1.3.10;
+
 # Symbols in the support library (libsupc++) supporting transactional memory.
 CXXABI_TM_1 {
 
diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index e2c4f63..b91453f 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -205,6 +205,8 @@ bits_sup_headers = \
${bits_sup_srcdir}/cxxabi_forced.h \
${bits_sup_srcdir}/exception_defines.h \
${bits_sup_srcdir}/exception_ptr.h \
+   ${bits_sup_srcdir}/exception.h \
+   ${bits_sup_srcdir}/cxxabi_init_exception.h \
${bits_sup_srcdir}/hash_bytes.h \
${bits_sup_srcdir}/nested_exception.h
 
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index 882ff14..1259ad4 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -495,6 +495,8 @@ bits_sup_headers = \
${bits_sup_srcdir}/cxxabi_forced.h \
${bits_sup_srcdir}/exception_defines.h \
${bits_sup_srcdir}/exception_ptr.h \
+   ${bits_sup_srcdir}/exception.h \
+   ${bits_sup_srcdir}/cxxabi_init_exception.h \
${bits_sup_srcdir}/hash_bytes.h \
${bits_sup_srcdir}/nested_exception.h
 
diff --git a/libstdc++-v3/libsupc++/Makefile.am 
b/libstdc++-v3/libsupc++/Makefile.am
index b45b5ae..ba4eac1 100644
--- a/libstdc++-v3/libsupc++/Makefile.am
+++ b/libstdc++-v3/libsupc++/Makefile.am
@@ -35,7 +35,7 @@ std_HEADERS = \
 
 bits_HEADERS = \
atomic_lockfree_defines.h cxxabi_forced.h \
-   exception_defines.h exception_ptr.h hash_bytes.h nested_exception.h
+   exception_defines.h exception_ptr.h hash_bytes.h nested_exception.h 
exception.h cxxabi_init_exception.h
 
 headers = $(std_HEADERS) $(bits_HEADERS)
 
diff --git a/libstdc++-v3/libsupc++/Makefile.in 
b/libstdc++-v3/libsupc++/Makefile.in
index f3648ac..3fb9d16 100644
--- a/libstdc++-v3/libsupc++/Makefile.in
+++ b/libstdc++-v3/libsupc++/Makefile.in
@@ -395,7 +395,7 @@ std_HEADERS = \
 
 bits_HEADERS = \
atomic_lockfree_defines.h cxxabi_forced.h \
-   exception_defines.h exception_ptr.h hash_bytes.h nested_exception.h
+   exception_defines.h exception_ptr.h hash_bytes.h nested_exception.h 
exception.h cxxabi_init_exception.h
 
 headers = $(std_HEADERS) $(bits_HEADERS)
 @GLIBCXX_HOSTED_TRUE@c_sources = \
diff --git a/libstdc++-v3/libsupc++/cxxabi.h b/libstdc++-v3/libsupc++/cxxabi.h
index 11ff9e5..f4b8f75 100644
--- a/libstdc++-v3/libsupc++/cxxabi.h
+++ b/libstdc++-v3/libsupc++/cxxabi.h
@@ -49,10 +49,7 @@
 #include 

[C++ Patch] PR 72800

2016-08-04 Thread Paolo Carlini

Hi,

when back in 2014 I sent the patch for c++/61088 I noticed some cases 
where we wanted to return early error_mark_node from add_capture to 
avoid ICEs during error recovery when COMPLETE_TYPE_P is used on an 
error_mark_node. The new testcase noticed one additional case, where the 
error_mark_node is returned by lambda_capture_field_type. I propose to 
just immediately check it. Alternately we could add the check to the 
else branch of the following conditional (which also passes testing) or 
somewhere else too, I guess. Tested x86_64-linux.


Thanks,

Paolo.



/cp
2016-08-04  Paolo Carlini  

PR c++/72800
* lambda.c (add_capture): Check lambda_capture_field_type return
value for error_mark_node.

/testsuite
2016-08-04  Paolo Carlini  

PR c++/72800
* g++.dg/cpp1y/lambda-ice1.C: New.
Index: cp/lambda.c
===
--- cp/lambda.c (revision 239118)
+++ cp/lambda.c (working copy)
@@ -492,6 +492,8 @@ add_capture (tree lambda, tree id, tree orig_init,
   else
 {
   type = lambda_capture_field_type (initializer, explicit_init_p);
+  if (type == error_mark_node)
+   return error_mark_node;
   if (by_reference_p)
{
  type = build_reference_type (type);
Index: testsuite/g++.dg/cpp1y/lambda-ice1.C
===
--- testsuite/g++.dg/cpp1y/lambda-ice1.C(revision 0)
+++ testsuite/g++.dg/cpp1y/lambda-ice1.C(working copy)
@@ -0,0 +1,7 @@
+// PR c++/72800
+// { dg-do compile { target c++14 } }
+
+void foo ()
+{
+  [n {}] {};  // { dg-error "one element|deducing" }
+}


Re: Use correct location information for OpenACC shape and simple clauses in C/C++

2016-08-04 Thread Jeff Law

On 07/27/2016 09:17 AM, Thomas Schwinge wrote:

Hi!

I found that for a lot of OpenACC (and potentially also OpenMP) clauses
(in C/C++ front ends; didn't look at Fortran), we use wrong location
information.  The problem is that
c_parser_oacc_all_clauses/c_parser_omp_all_clauses calls
cp_parser_omp_clause_name to determine the pragma_omp_clause c_kind, and
that function (as documented) consumes the clause token before returning.
So, when we then do "c_parser_peek_token (parser)->location" or similar
in some clause parsing function, that will return the location
information of the token _after_ the clause token, which -- at least very
often -- is not desirable, in particular if that location information is
used then in a build_omp_clause call, which should point to the clause
token itself, and not whatever follows after that.

Probably that all went unnoticed for so long, because the GCC testsuite
largely is running with -fno-diagnostics-show-caret, so we don't visually
see the wrong location information (and nobody pays attention to the
colum information as given, for example, as line 2, column 32 in
"[...]/c-c++-common/goacc/routine-2.c:2:32: error: [...]".

There seems to be a lot of inconsistency in that in all the clause
parsing; here is just a first patch to fix the immediate problem I've
been observing.  OK for trunk already, or need to clean this all up in
one go?  Do we need this on release branches, as a "quality of
implementation" fix (wrong diagnostic locations)?

commit bac4c04ca1d52c56a3583f5958e116c62b889d5a
Author: Thomas Schwinge 
Date:   Wed Jul 27 16:55:56 2016 +0200

Use correct location information for OpenACC shape and simple clauses in 
C/C++

gcc/c/
* c-parser.c (c_parser_oacc_shape_clause)
(c_parser_oacc_simple_clause): Add loc formal parameter.  Adjust
all users.
gcc/cp/
* parser.c (cp_parser_oacc_shape_clause): Add loc formal
parameter.  Adjust all users.
---
 gcc/c/c-parser.c | 25 +
 gcc/cp/parser.c  | 12 +++-
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git gcc/c/c-parser.c gcc/c/c-parser.c
index 0031481..82ac855 100644
--- gcc/c/c-parser.c
+++ gcc/c/c-parser.c
@@ -11758,12 +11758,12 @@ c_parser_oacc_shape_clause (c_parser *parser, 
omp_clause_code kind,
seq */

 static tree
-c_parser_oacc_simple_clause (c_parser *parser, enum omp_clause_code code,
-tree list)
+c_parser_oacc_simple_clause (c_parser * /* parser */, location_t loc,
+enum omp_clause_code code, tree list)
Any reason not to just drop the parser argument entirely?  If we must 
have it to match an API, but don't need it, then just drop the argument 
name entirely rather than commenting it out.  This kind of comment, IMHO 
serves no useful purpose.


With that change and some tests (presumably using David recipe) this is 
will be fine.


jeff



Re: [PATCH 1/4] Cherry-pick fprofile-generate-atomic from google/gcc-4_9 branch

2016-08-04 Thread Nathan Sidwell

On 08/04/16 12:43, Nathan Sidwell wrote:


How about:
gcov_t expected;
atomic_load (&counter[0],  val, ...);
gcov_t delta = val == value ? 1 : -1;
atomic_add (&counter[1], delta);   <-- or atomic_add_fetch
if (delta < 0) {
  /* can we set counter[0]? */
  atomic_load (&counter[1], &expected, ...);
  if (expected < 0) {
atomic_store (&counter[0], value, ...);
atomic_add (&counter[1], 2, ...);
  }
}
atomic_add (&counter[2], 1, ...);


we could do better by using compare_exchange storing value, and detect the race 
I mentioned:


gcov_t expected, val;
atomic_load (&counter[0],  &val, ...);
gcov_t delta = val == value ? 1 : -1;
atomic_add (&counter[1], delta);
if (delta < 0) {
   retry:
/* can we set counter[0]? */
atomic_load (&counter[1], &expected, ...);
if (expected < 0) {
  bool stored = atomic_compare_exchange (&counter[0], &val, &value, ...);
  if (!stored && val != value)
goto retry;
  atomic_add (&counter[1], 2, ...);
  }
}
atomic_add (&counter[2], 1, ...);

This  corrects the off-by one issue.

nathan


Re: [PATCH, rs6000] Switch the rs6000 port over to LRA

2016-08-04 Thread Peter Bergner

On 8/3/16 6:03 PM, David Edelsohn wrote:

Please open a Bugzilla for the rs6000 backend about the vsx-timode
performance regression.  The vsx-timode regression needs to be fixed
for GCC 7.


Ok, I opened https://gcc.gnu.org/PR72804 and will start debugging
the problem.

Peter




Re: Implement -Wimplicit-fallthrough (take 2): fix missing breaks

2016-08-04 Thread Ian Lance Taylor
On Thu, Jul 28, 2016 at 8:57 AM, Marek Polacek  wrote:
> On Wed, Jul 27, 2016 at 10:05:25AM -0700, Mike Stump wrote:
>> On Jul 27, 2016, at 9:52 AM, Marek Polacek  wrote:
>> >
>> > This is what the new warning pointed out.  I think all these are bugs.
>> >
>> > --- gcc/libgo/runtime/heapdump.c
>> > +++ gcc/libgo/runtime/heapdump.c
>> > @@ -766,6 +766,7 @@ dumpefacetypes(void *obj __attribute__ ((unused)), 
>> > uintptr size, const Type *typ
>> > for(i = 0; i <= size - type->__size; i += type->__size)
>> > //playgcprog(i, (uintptr*)type->gc + 1, 
>> > dumpeface_callback, obj);
>> > break;
>> > +   break;
>> > case TypeInfo_Chan:
>> > if(type->__size == 0) // channels may have zero-sized objects 
>> > in them
>> > break;
>>
>> I disagree that's the best fix.  Better would be to uncomment out the 
>> playgcprog calls, and #if 0 the entire contents of the function.
>
> You're right -- I only looked at the particular switch case, not the entire
> function.  I did as you suggested.  Ian, do you want to take care of this?

Thanks for pointing it out.  I committed this patch to fix the problem.

For the patch bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.

Ian
Index: gcc/go/gofrontend/MERGE
===
--- gcc/go/gofrontend/MERGE (revision 239141)
+++ gcc/go/gofrontend/MERGE (working copy)
@@ -1,4 +1,4 @@
-235dffb0de1e99d6f521f052067f0e936bf63baa
+ae44ca35b0b1c2ab925cadbcd7d47b334be5a318
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
Index: libgo/runtime/heapdump.c
===
--- libgo/runtime/heapdump.c(revision 238653)
+++ libgo/runtime/heapdump.c(working copy)
@@ -763,14 +763,16 @@ dumpefacetypes(void *obj __attribute__ (
//playgcprog(0, (uintptr*)type->gc + 1, dumpeface_callback, 
obj);
break;
case TypeInfo_Array:
-   for(i = 0; i <= size - type->__size; i += type->__size)
+   for(i = 0; i <= size - type->__size; i += type->__size) {
//playgcprog(i, (uintptr*)type->gc + 1, 
dumpeface_callback, obj);
+   }
break;
case TypeInfo_Chan:
if(type->__size == 0) // channels may have zero-sized objects 
in them
break;
-   for(i = runtime_Hchansize; i <= size - type->__size; i += 
type->__size)
+   for(i = runtime_Hchansize; i <= size - type->__size; i += 
type->__size) {
//playgcprog(i, (uintptr*)type->gc + 1, 
dumpeface_callback, obj);
+   }
break;
}
 }


Re: [PATCH 1/3] (v2) On-demand locations within string-literals

2016-08-04 Thread Jeff Law

On 08/04/2016 08:27 AM, David Malcolm wrote:


As for test coverage, v2 and v3 of the kit add over a thousand lines of
selftest code that heavily exercise string lexing, using the
 line_table_case machinery to run the tests with various interesting
boundary conditions with line_table (e.g. near
 LINE_MAP_MAX_LOCATION_WITH_PACKED_RANGES).

In terms of test coverage of the fallbacks, patch 2 of v3 of the kit
directly exercises the substr_loc.get_range in
gcc.dg/plugin/diagnostic_plugin_test_string_literals.c via
gcc.dg/plugin/diagnostic-test-string-literals-1.c, and some of the
tests there cover the failures, via:

  error_at (strloc, "unable to read substring range: %s", err);

which we wouldn't do in a normal diagnostic (but which is appropriate
for testing the machinery itself).

Patch 3 of the v3 kit adds a format_warning_va function to c-format.c
which is responsible for dealing with failures:
https://gcc.gnu.org/ml/gcc-patches/2016-08/msg00204.html
THanks for pointing this out.  I hadn't started looking at the meat of 
the on-demand locations until this morning.





Looking at patch 3, there's a fair amount of end-to-end testing in
 gcc.dg/format/diagnostic-ranges.c but it looks like I forgot to add an
end-to-end test there of failure due to stringification; I can add one.
 Is the rest of the v3 patch kit reviewable?
Absolutely.  I wasn't trying to imply that it wasn't  -- in fact most of 
it is self-approvable stuff and I've only got a couple questions about 
the rest.


Jeff



Re: [PATCH 2/4] (v3) On-demand locations within string-literals

2016-08-04 Thread Jeff Law

On 08/03/2016 09:45 AM, David Malcolm wrote:

Changes in v3:
- Avoid including cpplib.h from input.h
- Properly handle stringified macro arguments (with tests for this)
- Minor whitespace fixes
- Move selftest.h changes to a separate patch

Changes in v2:
- Tweaks to substring location selftests
- Many more selftests (EBCDIC, the various wide string types, etc)
- Clean up conditions in charset.c; require source == execution charset
  to have substring locations
- Make string_concat_db field private
- Return error messages rather than bool
- Fix source_range for charset.c:convert_escape
- Introduce class substring_loc
- Handle bad input locations more gracefully
- Ensure that we can read substring information for a token which
  starts in one linemap and ends in another (seen in
  gcc.dg/cpp/pr69985.c)

This version addresses Joseph's qn about stringification of macro
arguments (by failing gracefully on them), and the modularity
concerns noted by Manu.

Successfully bootstrapped®rtested in conjunction with the rest of the
patch kit on x86_64-pc-linux-gnu.

v2 of the kit successfully passes a full config-list.mk and a successful 
selftest
run for stage 1 on powerpc-ibm-aix7.1.3.0 (gcc111), both in conjunction with the
rest of the patch kit; I plan to repeat those tests.

I believe I can self-approve the changes to input.c, input.h, libcpp,
and the testsuite; the remaining changes needing approval are those
to c-family and to gcc.c.
I think that's a fair assessment.  You might consider pulling those out 
as a distinct hunk in the future -- if you haven't noticed, I often try 
to knock out the smaller patches first (without even looking to see how 
much might be bits the author can self-approve).





OK for trunk if it passes testing? (by itself)


gcc/c-family/ChangeLog:
* c-common.c: Include "substring-locations.h".
(get_cpp_ttype_from_string_type): New function.
(g_string_concat_db): New global.
(substring_loc::get_range): New method.
* c-common.h (g_string_concat_db): New declaration.
(class substring_loc): New class.
* c-lex.c (lex_string): When concatenating strings, capture the
locations of all tokens using a new obstack, and record the
concatenation locations within g_string_concat_db.
* c-opts.c (c_common_init_options): Construct g_string_concat_db
on the ggc-heap.

gcc/ChangeLog:
* gcc.c (cpp_options): Rename string to...
(cpp_options_): ...this, to avoid clashing with struct in
cpplib.h.
(static_specs): Update initialize for above renaming
* input.c (string_concat::string_concat): New constructor.
(string_concat_db::string_concat_db): New constructor.
(string_concat_db::record_string_concatenation): New method.
(string_concat_db::get_string_concatenation): New method.
(string_concat_db::get_key_loc): New method.
(class auto_cpp_string_vec): New class.
(get_substring_ranges_for_loc): New function.
(get_source_range_for_substring): New function.
(get_num_source_ranges_for_substring): New function.
(class selftest::lexer_test_options): New class.
(struct selftest::lexer_test): New struct.
(class selftest::ebcdic_execution_charset): New class.
(selftest::ebcdic_execution_charset::s_singleton): New variable.
(selftest::lexer_test::lexer_test): New constructor.
(selftest::lexer_test::~lexer_test): New destructor.
(selftest::lexer_test::get_token): New method.
(selftest::assert_char_at_range): New function.
(ASSERT_CHAR_AT_RANGE): New macro.
(selftest::assert_num_substring_ranges): New function.
(ASSERT_NUM_SUBSTRING_RANGES): New macro.
(selftest::assert_has_no_substring_ranges): New function.
(ASSERT_HAS_NO_SUBSTRING_RANGES): New macro.
(selftest::test_lexer_string_locations_simple): New function.
(selftest::test_lexer_string_locations_ebcdic): New function.
(selftest::test_lexer_string_locations_hex): New function.
(selftest::test_lexer_string_locations_oct): New function.
(selftest::test_lexer_string_locations_letter_escape_1): New function.
(selftest::test_lexer_string_locations_letter_escape_2): New function.
(selftest::test_lexer_string_locations_ucn4): New function.
(selftest::test_lexer_string_locations_ucn8): New function.
(selftest::uint32_from_big_endian): New function.
(selftest::test_lexer_string_locations_wide_string): New function.
(selftest::uint16_from_big_endian): New function.
(selftest::test_lexer_string_locations_string16): New function.
(selftest::test_lexer_string_locations_string32): New function.
(selftest::test_lexer_string_locations_u8): New function.
(selftest::test_lexer_string_locations_utf8_source): New function.
(selftest::test_lexer_string_locations_concat

Re: [PATCH] do not throw in std::make_exception_ptr

2016-08-04 Thread Jonathan Wakely

On 04/08/16 20:01 +0300, Gleb Natapov wrote:

Instead of throwing an exception allocate its memory and initialize it
explicitly. Makes std::make_exception_ptr more efficient since no stack
unwinding is needed.

In this version I hopefully addressed all Jonathan comments.

* libsupc++/exception (std::exception): Move...
* libsupc++/exception.h: ...here; New.
* libsupc++/cxxabi.h (__cxa_allocate_exception): Move...
* libsupc++/cxxabi_init_exception.h: ...here and add
__cxa_init_primary_exception; New.
* config/abi/pre/gnu-versioned-namespace.ver: add
__cxa_init_primary_exception and std::exception_ptr(void*)
* config/abi/pre/gnu.ver (CXXABI_1.3.11) : add
__cxa_init_primary_exception and std::exception_ptr(void*)
(CXXABI_1.3.11): New.
* include/Makefile.am: add exception.h and cxxabi_init_exception.h
* include/Makefile.in: Likewise.
* libsupc++/Makefile.am: add exception.h and cxxabi_init_exception.h
* libsupc++/Makefile.in: Likewise.
* libsupc++/eh_throw.cc(__cxa_throw): add __cxa_init_primary_exception
and use it
* libsupc++/exception_ptr.h(std::make_exception_ptr): use
__cxa_allocate_exception and __cxa_init_primary_exception to create
exception pointer
* libsupc++/typeinfo: include bits/exception.h instead of exception


This version is *much* easier to review :-)


+namespace __cxxabiv1
+{
+  struct __cxa_refcounted_exception;
+
+  extern "C"
+{
+  // Allocate memory for the primary exception plus the thrown object.
+  void*
+__cxa_allocate_exception(size_t) _GLIBCXX_NOTHROW;


Please align __cxa_allocate_exception with the return type on the
previous line.


+
+  // Initialize exception


Please add "(this is a GNU extension)" to the comment.


+  __cxa_refcounted_exception*
+  __cxa_init_primary_exception(void *object, std::type_info *tinfo,
+void (_GLIBCXX_CDTOR_CALLABI *dest) (void *)) _GLIBCXX_NOTHROW;
+
+}
+} // namespace __cxxabiv1
+
+#endif
+
+#pragma GCC visibility pop
+
+#endif // _CXXABI_INIT_EXCEPTION_H
index 63631f6..8be903b 100644
--- a/libstdc++-v3/libsupc++/exception
+++ b/libstdc++-v3/libsupc++/exception
@@ -36,39 +36,12 @@

#include 
#include 
+#include 

extern "C++" {

namespace std
{
-  /**
-   * @defgroup exceptions Exceptions
-   * @ingroup diagnostics
-   *
-   * Classes and functions for reporting errors via exception classes.
-   * @{
-   */


The Doxygen group that starts with @{ ends later in this file, but you
haven't moved the corresponding @} to the new file.

I can take care of fixing that up though (we want the contents of the
new file and this one to all be in the group).



@@ -162,8 +169,12 @@ namespace std
swap(exception_ptr& __lhs, exception_ptr& __rhs)
{ __lhs.swap(__rhs); }

-  } // namespace __exception_ptr
+template
+  static inline void
+  __dest_thunk(void* x)
+  { reinterpret_cast<_Ex*>(x)->~_Ex(); }


This is still 'static' so we get a separate instantiation in every
translation unit that uses make_exception_ptr. It should non 'inline'
but not 'static'.

Looking at it again now, is there any reason to use reinterpret_cast
rather than static_cast? I think they're identical in this context,
and I prefer not to only use reinterpret_cast as a last resort.

Other than that, I think this is good to commit.

I can't think of any way to test the changes beyond the existing tests
for exception_ptr, so we don't need new test files.




[PATCH] Define std::is_callable and std::is_nothrow_callable

2016-08-04 Thread Jonathan Wakely

Two new traits for C++17, as well as identical __is__callable and
__is_nothrow_callable traits defined for C++11 mode (so I can use the
latter to add an exception specification to std::__invoke).

* doc/xml/manual/status_cxx2017.xml: Update status table.
* include/std/functional (__inv_unwrap): Move to .
(__invoke_impl): Remove exception specifications.
(__invoke, invoke): Add exception specifications using
__is_nothrow_callable.
* include/std/type_traits (__inv_unwrap): Move from .
(__is_callable_impl, __call_is_nt, __call_is_nothrow): New helpers.
(__is_callable, __is_nothrow_callable): New traits.
(is_callable, is_callable_v): New C++17 traits.
(is_nothrow_callable, is_nothrow_callable_v): Likewise.
* testsuite/20_util/is_callable/requirements/
explicit_instantiation.cc: New test.
* testsuite/20_util/is_callable/requirements/
explicit_instantiation_ext.cc: New test.
* testsuite/20_util/is_callable/requirements/typedefs.cc: New test.
* testsuite/20_util/is_callable/requirements/typedefs_ext.cc: New
test.
* testsuite/20_util/is_callable/value.cc: New test.
* testsuite/20_util/is_callable/value_ext.cc: New test.
* testsuite/20_util/is_nothrow_callable/requirements/
explicit_instantiation.cc: New test.
* testsuite/20_util/is_nothrow_callable/requirements/
explicit_instantiation_ext.cc: New test.
* testsuite/20_util/is_nothrow_callable/requirements/typedefs.cc:
New test.
* testsuite/20_util/is_nothrow_callable/requirements/typedefs_ext.cc:
New test.
* testsuite/20_util/is_nothrow_callable/value.cc: New test.
* testsuite/20_util/is_nothrow_callable/value_ext.cc: New test.

Tested x86_64-linux, committed to trunk.


commit 8f42033a7c8f66b5a8abd05758f176e3cb3c2933
Author: Jonathan Wakely 
Date:   Thu Aug 4 01:48:56 2016 +0100

Define std::is_callable and std::is_nothrow_callable

* doc/xml/manual/status_cxx2017.xml: Update status table.
* include/std/functional (__inv_unwrap): Move to .
(__invoke_impl): Remove exception specifications.
(__invoke, invoke): Add exception specifications using
__is_nothrow_callable.
* include/std/type_traits (__inv_unwrap): Move from .
(__is_callable_impl, __call_is_nt, __call_is_nothrow): New helpers.
(__is_callable, __is_nothrow_callable): New traits.
(is_callable, is_callable_v): New C++17 traits.
(is_nothrow_callable, is_nothrow_callable_v): Likewise.
* testsuite/20_util/is_callable/requirements/
explicit_instantiation.cc: New test.
* testsuite/20_util/is_callable/requirements/
explicit_instantiation_ext.cc: New test.
* testsuite/20_util/is_callable/requirements/typedefs.cc: New test.
* testsuite/20_util/is_callable/requirements/typedefs_ext.cc: New
test.
* testsuite/20_util/is_callable/value.cc: New test.
* testsuite/20_util/is_callable/value_ext.cc: New test.
* testsuite/20_util/is_nothrow_callable/requirements/
explicit_instantiation.cc: New test.
* testsuite/20_util/is_nothrow_callable/requirements/
explicit_instantiation_ext.cc: New test.
* testsuite/20_util/is_nothrow_callable/requirements/typedefs.cc:
New test.
* testsuite/20_util/is_nothrow_callable/requirements/typedefs_ext.cc:
New test.
* testsuite/20_util/is_nothrow_callable/value.cc: New test.
* testsuite/20_util/is_nothrow_callable/value_ext.cc: New test.

diff --git a/libstdc++-v3/doc/xml/manual/status_cxx2017.xml 
b/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
index d32399d..8391758 100644
--- a/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
+++ b/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
@@ -402,14 +402,13 @@ Feature-testing recommendations for C++.
 
 
 
-  
is_callable, the missing INVOKE related 
trait
   
http://www.w3.org/1999/xlink"; 
xlink:href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0077r2.html";>
P0077R2

   
-   No 
+   7 
__cpp_lib_is_callable >= 201603 
 
 
diff --git a/libstdc++-v3/include/std/functional 
b/libstdc++-v3/include/std/functional
index d635ef5..843dc83 100644
--- a/libstdc++-v3/include/std/functional
+++ b/libstdc++-v3/include/std/functional
@@ -184,18 +184,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 : _Weak_result_type_impl::type>
 { };
 
-  template::type>
-struct __inv_unwrap
-{
-  using type = _Tp;
-};
-
-  template
-struct __inv_unwrap<_Tp, reference_wrapper<_Up>>
-{
-  using type = _Up&;
-};
-
   // Used by __invoke_impl instead of std::forward<_Tp> so that a
   // reference_wrapper is converted to an lvalue-reference.
   template::type>
@@ -206,23 +194,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VER

[PATCH, Fortran] New flag -finit-derived to initialize components of derived types

2016-08-04 Thread Fritz Reese
All,

With many other compilers, local variables are automatically
initialized to zero (or some other user-specified value) by default.
GNU Fortran allows this with the options -finit-local-zero,
-finit-real=, -finit-integer=, etc... However several other compilers
also initialize structure variables (components of derived type
variables), a feature which GNU Fortran does not provide. This would
be a useful feature, and (unfortunately) some legacy code even relies
on automatic initialization of structures.

To increase usability and compatibility I have thus a patch which
introduces a new flag -finit-derived into GNU Fortran, allowing
initialization of automatic derived-type and structure variables. With
the patch GNU Fortran generates initializers for structure/derived
type components as if they were local variables of the same type,
according to the other initialization flags (-finit-local-zero,
finit-real=, -finit-integer=, etc...).

The bulk of the patch includes refactoring some common behaviors in
the existing functions in resolve.c (build_default_init_expr) and
decl.c (build_struct) by placing them in new functions in expr.c
(gfc_build_default_init_expr, gfc_apply_init). The crux of the patch
is a fairly simple tweak to expr.c (gfc_default_initializer), now in
the new functions (component_initializer, gfc_generate_initializer),
which generates initializers for components that do not have them,
when the time is right. Please review and let me know if there are
questions or comments.

Bootstraps and passes all tests (including the several shipped with
it) on x86_64-redhat-linux. If it is ok for trunk I will commit.

---
Fritz Reese

2016-08-04  Fritz Reese  

gcc/fortran/
* lang.opt, invoke.texi: New flag -finit-derived.
* gfortran.h (gfc_build_default_init_expr, gfc_apply_init,
gfc_generate_initializer): New prototypes.
* expr.c (gfc_build_default_init_expr, gfc_apply_init,
component_initializer, gfc_generate_initializer): New functions.
* expr.c (gfc_default_initializer): Wrap gfc_generate_initializer.
* decl.c (build_struct): Move common code to gfc_apply_init.
* resolve.c (can_generate_init): New function.
* resolve.c (build_default_init_expr): Wrap gfc_build_default_init_expr.
* resolve.c (apply_default_init, resolve_fl_variable_derived): Use
gfc_generate_initializer.
* trans-decl.c (gfc_generate_function_code): Use
gfc_generate_initializer.

gcc/testsuite/gfortran.dg/
* init_flag_13.f90: New testcase.
* init_flag_14.f90: Ditto.
* init_flag_15.f03: Ditto.
* dec_init_1.f90: Ditto.
* dec_init_2.f90: Ditto.
diff --git a/gcc/fortran/decl.c b/gcc/fortran/decl.c
index 818e7d4..80af17c 100644
--- a/gcc/fortran/decl.c
+++ b/gcc/fortran/decl.c
@@ -1910,53 +1910,7 @@ build_struct (const char *name, gfc_charlen *cl, gfc_expr **init,
 }
   *as = NULL;
 
-  /* Should this ever get more complicated, combine with similar section
- in add_init_expr_to_sym into a separate function.  */
-  if (c->ts.type == BT_CHARACTER && !c->attr.pointer && c->initializer
-  && c->ts.u.cl
-  && c->ts.u.cl->length && c->ts.u.cl->length->expr_type == EXPR_CONSTANT)
-{
-  int len;
-
-  gcc_assert (c->ts.u.cl && c->ts.u.cl->length);
-  gcc_assert (c->ts.u.cl->length->expr_type == EXPR_CONSTANT);
-  gcc_assert (c->ts.u.cl->length->ts.type == BT_INTEGER);
-
-  len = mpz_get_si (c->ts.u.cl->length->value.integer);
-
-  if (c->initializer->expr_type == EXPR_CONSTANT)
-	gfc_set_constant_character_len (len, c->initializer, -1);
-  else if (c->initializer
-		&& c->initializer->ts.u.cl
-		&& mpz_cmp (c->ts.u.cl->length->value.integer,
-			c->initializer->ts.u.cl->length->value.integer))
-	{
-	  gfc_constructor *ctor;
-	  ctor = gfc_constructor_first (c->initializer->value.constructor);
-
-	  if (ctor)
-	{
-	  int first_len;
-	  bool has_ts = (c->initializer->ts.u.cl
-			 && c->initializer->ts.u.cl->length_from_typespec);
-
-	  /* Remember the length of the first element for checking
-		 that all elements *in the constructor* have the same
-		 length.  This need not be the length of the LHS!  */
-	  gcc_assert (ctor->expr->expr_type == EXPR_CONSTANT);
-	  gcc_assert (ctor->expr->ts.type == BT_CHARACTER);
-	  first_len = ctor->expr->value.character.length;
-
-	  for ( ; ctor; ctor = gfc_constructor_next (ctor))
-		if (ctor->expr->expr_type == EXPR_CONSTANT)
-		{
-		  gfc_set_constant_character_len (len, ctor->expr,
-		  has_ts ? -1 : first_len);
-		  ctor->expr->ts.u.cl->length = gfc_copy_expr (c->ts.u.cl->length);
-		}
-	}
-	}
-}
+  gfc_apply_init (&c->ts, &c->attr, c->initializer);
 
   /* Check array components.  */
   if (!c->attr.dimension)
diff --git a/gcc/fortran/expr.c b/gcc/fortran/expr.c
index 6d0eb22..8e2b892 100644
--- a/gcc/fortran/expr.c
+++ b/gcc/fortran/expr.c
@@ -391

  1   2   >