[committed] Fix up gcc.dg/tree-ssa/pr55579.c test (PR debug/56307)

2013-03-15 Thread Jakub Jelinek
Hi!

As this supposedly affects all targets that don't default to DWARF debug
info, I've committed this fix as obvious to force using
-fvar-tracking-assignments everywhere.

Tested on x86_64-linux and with cross to hppa2.0w-hp-hpux11.11.

2013-03-15  Jakub Jelinek  

PR debug/56307
* gcc.dg/tree-ssa/pr55579.c: Add -fvar-tracking-assignments to
dg-options.  Remove 32-bit hppa*-*-hpux* xfail.

--- gcc/testsuite/gcc.dg/tree-ssa/pr55579.c.jj  2013-03-12 09:59:36.0 
+0100
+++ gcc/testsuite/gcc.dg/tree-ssa/pr55579.c 2013-03-15 08:03:17.056166515 
+0100
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -g -fdump-tree-esra" } */
+/* { dg-options "-O2 -g -fdump-tree-esra -fvar-tracking-assignments" } */
 
 struct S { int a; char b; char c; short d; };
 
@@ -11,6 +11,5 @@ foo (int x)
   return x;
 }
 
-/* Test fails on 32-bit hppa*-*-hpux*.  See PR debug/56307.  */
-/* { dg-final { scan-tree-dump "Created a debug-only replacement for s" "esra" 
{ xfail { hppa*-*-hpux* && { ! lp64 } } } } } */
+/* { dg-final { scan-tree-dump "Created a debug-only replacement for s" "esra" 
} } */
 /* { dg-final { cleanup-tree-dump "esra" } } */

Jakub


[Patch, Fortran] PR56615 - Wrong-code with TRANSFER of noncontiguous arrays

2013-03-15 Thread Tobias Burnus
The issue is a regression which exists since GCC 4.4. The fix is rather 
obvious (see also PR).


Build and regtested on x86-64-gnu-linux.
OK for the trunk and the two maintained branches, 4.6 and 4.7?

Tobias
2013-03-15  Tobias Burnus  

	PR fortran/56615
	* trans-intrinsic.c (gfc_conv_intrinsic_transfer): Pack arrays
	if they are not simply contiguous.

2013-03-15  Tobias Burnus  

	PR fortran/56615
	* gfortran.dg/transfer_intrinsic_5.f90: New.

diff --git a/gcc/fortran/trans-intrinsic.c b/gcc/fortran/trans-intrinsic.c
index 83e3acf..7905503 100644
--- a/gcc/fortran/trans-intrinsic.c
+++ b/gcc/fortran/trans-intrinsic.c
@@ -5435,9 +5435,8 @@ gfc_conv_intrinsic_transfer (gfc_se * se, gfc_expr * expr)
   source = gfc_conv_descriptor_data_get (argse.expr);
   source_type = gfc_get_element_type (TREE_TYPE (argse.expr));
 
-  /* Repack the source if not a full variable array.  */
-  if (arg->expr->expr_type == EXPR_VARIABLE
-	  && arg->expr->ref->u.ar.type != AR_FULL)
+  /* Repack the source if not simply contiguous.  */
+  if (!gfc_is_simply_contiguous (arg->expr, false))
 	{
 	  tmp = gfc_build_addr_expr (NULL_TREE, argse.expr);
 
diff --git a/gcc/testsuite/gfortran.dg/transfer_intrinsic_5.f90 b/gcc/testsuite/gfortran.dg/transfer_intrinsic_5.f90
new file mode 100644
index 000..47be585
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/transfer_intrinsic_5.f90
@@ -0,0 +1,50 @@
+! { dg-do run }
+!
+! PR fortran/56615
+!
+! Contributed by  Harald Anlauf
+!
+!
+program gfcbug
+  implicit none
+  integer, parameter :: n = 8
+  integer:: i
+  character(len=1), dimension(n) :: a, b
+  character(len=n)   :: s, t
+  character(len=n/2) :: u
+
+  do i = 1, n
+ a(i) = achar (i-1 + iachar("a"))
+  end do
+!  print *, "# Forward:"
+!  print *, "a=", a
+  s = transfer (a, s)
+!  print *, "s=", s
+  call cmp (a, s)
+!  print *, "  stride = +2:"
+  do i = 1, n/2
+ u(i:i) = a(2*i-1)
+  end do
+!  print *, "u=", u
+  call cmp (a(1:n:2), u)
+!  print *
+!  print *, "# Backward:"
+  b = a(n:1:-1)
+!  print *, "b=", b
+  t = transfer (b, t)
+!  print *, "t=", t
+  call cmp (b, t)
+!  print *, "  stride = -1:"
+  call cmp (a(n:1:-1), t)
+contains
+  subroutine cmp (b, s)
+character(len=1), dimension(:), intent(in) :: b
+character(len=*),   intent(in) :: s
+character(len=size(b)) :: c
+c = transfer (b, c)
+if (c /= s) then
+  print *, "c=", c, "", merge ("  ok","BUG!", c == s)
+  call abort ()
+end if
+  end subroutine cmp
+end program gfcbug


Re: [C++11][4.9] Add missing REDUC_PLUS_EXPR case to potential_constant_expression_1.

2013-03-15 Thread Richard Biener
On Thu, Mar 14, 2013 at 10:08 PM, Marc Glisse  wrote:
> On Thu, 14 Mar 2013, Jakub Jelinek wrote:
>
>> I wonder if it wouldn't be better to fold the target builtins only later
>> on
>> (e.g. guard the folding with cfun && gimple_in_ssa_p (cfun) (or if we have
>> any predicate that is set starting with gimplification or so)).
>> Having all the FEs have to deal with myriads of weird tree codes etc.
>> isn't
>> IMHO desirable.
>
>
> Wouldn't that prevent from using those builtins in constant expressions?
> That seems undesirable. Maybe an alternative could be to push some of the
> functionality from potential_constant_expression_1 to the middle-end?

True, but is that bad?

If we want to delay such folding then please don't do it with a magic flag
but instead do the folding only via fold_stmt - that is, add a new target hook
that folds a gimple call.  I bet we have only a very limited set of target call
foldings, so transitioning them all to fold gimple calls would be easy.

Richard.

> --
> Marc Glisse


Re: RFC: add some static probes to libstdc++

2013-03-15 Thread Jakub Jelinek
On Thu, Feb 28, 2013 at 08:32:02AM -0700, Tom Tromey wrote:
> 2013-02-27  Tom Tromey  
> 
>   * libsupc++/unwind-cxx.h: Include sys/sdt.h if detected.
>   (PROBE2): New macro.
>   * libsupc++/eh_throw.cc (__cxa_throw, __cxa_rethrow): Add probe.
>   * libsupc++/eh_catch.cc (__cxa_begin_catch): Add probe.
>   * configure.ac: Check for sys/sdt.h.
>   * configure, config.h.in: Rebuild.

This is ok.  As we are close to 4.8.0-rc1, I went ahead and committed it for
you.

Jakub


Re: [4.7, go] Backport godump.c patch

2013-03-15 Thread Rainer Orth
Ian Lance Taylor  writes:

> On Thu, Mar 14, 2013 at 9:53 AM, Rainer Orth wrote:
>
>> I found that this patch
>>
>> 2012-12-04 Ian Lance Taylor
>> * godump.c (find_dummy_types): Output a dummy type if we couldn't
>>  output the real type.
>>
>>
>>
>> fixes the problem, so I'd like to backport it.
>>
>> i386-pc-solaris2.11 bootstrap still running, ok for 4.7 branch if it
>> passes?
>
> It's fine with me though I guess it needs to be approved by a release
> branch manager.

Ok, Cc'ed.  The i386-pc-solaris2.11 bootstrap now completed
successfully, and I've also bootstrapped on x86_64-unknown-linux-gnu to
make sure nothing breaks.

While the patch doesn't fix a regression, it does fix a bootstrap
failure and has been on mainline for 3 1/2 months, so seems pretty
safe.

>> One additional question: the patch updates the copyright date by adding
>> 2012. How do we hande this for a backport? Keep it that way or update
>> for 2013 instead?
>
> Keep it as 2012 since that is when the code was written.

Will do.

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [Patch, Fortran] PR56615 - Wrong-code with TRANSFER of noncontiguous arrays

2013-03-15 Thread Janus Weil
Hi Tobias,

> The issue is a regression which exists since GCC 4.4. The fix is rather
> obvious (see also PR).
>
> Build and regtested on x86-64-gnu-linux.
> OK for the trunk and the two maintained branches, 4.6 and 4.7?

yes, looks good to me (pretty much obvious).

It seems the 4.8 release is quite close, but I think this should still
be able to go in (not sure if you may need to consult the release
managers at this point). According to
http://gcc.gnu.org/ml/gcc/2013-03/msg00036.html, trunk is still in
release-branch mode, and no RC has been created AFAIK ...

Cheers,
Janus


Re: [PATCH][0/n] tree LIM TLC - series part for backporting, limit LIM

2013-03-15 Thread Richard Biener
On Thu, 14 Mar 2013, Richard Biener wrote:

> 
> This extracts pieces from the already posted patch series that are
> most worthwhile and applicable for backporting to both 4.8 and 4.7.
> It also re-implements the limiting of the maximum number of memory
> references to consider for LIMs dependence analysis.  This limiting
> is now done per loop-nest and disables optimizing outer loops
> only.  The limiting requires backporting introduction of the
> shared unalalyzable mem-ref - it works by marking that as stored
> in loops we do not want to compute dependences for - which makes
> dependence computation for mems in those loops linear, as that
> mem-ref, which conveniently has ID 0, is tested first.
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
> 
> The current limit of 1000 datarefs is quite low (well, for LIMs
> purposes, that is), and I only bothered to care about -O1 for
> backports (no caching of the affine combination).  With the
> limit in place and at -O1 LIM now takes
> 
>  tree loop invariant motion:   0.55 ( 1%) usr
> 
> for the testcase in PR39326.  Four patches in total, we might
> consider not backporting the limiting, without it this
> insane testcase has, at ~2GB memory usage (peak determined by IRA)
> 
>  tree loop invariant motion: 533.30 (77%) usr
> 
> but avoids running into the DSE / combine issue (and thus stays
> managable overall at -O1).  With limiting it requires -fno-dse
> to not blow up (>5GB of memory use).

Note that the limiting patch (below) causes code-generation differences
because it collects memory-references in a different order and
store-motion applies its transform in order of mem-ref IDs
(different order of loads / stores and different decl UIDs).  The
different ordering results in quite a big speedup because bitmaps
have a more regular form (maybe only for this testcase though).

Richard.

> 2013-03-14  Richard Biener  
> 
>   PR tree-optimization/39326
>   * tree-ssa-loop-im.c: Include diagnostic-core.h.
>   (mark_ref_stored): Optimize.
>   (gather_mem_refs_stmt): Also set all_refs_stored_bit if stored.
>   (create_vop_ref_mapping_loop, create_vop_ref_mapping): Remove
>   and fold functionality into ...
>   (gather_mem_refs_in_loops): ... this.  Iterate over loops,
>   counting memory references and punting when more than
>   --param loop-max-datarefs-for-datadeps.
>   (analyze_memory_references): Adjust.
> 
> Index: trunk/gcc/tree-ssa-loop-im.c
> ===
> *** trunk.orig/gcc/tree-ssa-loop-im.c 2013-03-14 12:52:37.0 +0100
> --- trunk/gcc/tree-ssa-loop-im.c  2013-03-14 14:23:47.533164359 +0100
> *** along with GCC; see the file COPYING3.
> *** 20,25 
> --- 20,26 
>   #include "config.h"
>   #include "system.h"
>   #include "coretypes.h"
> + #include "diagnostic-core.h"
>   #include "tm.h"
>   #include "tree.h"
>   #include "tm_p.h"
> *** record_mem_ref_loc (mem_ref_p ref, struc
> *** 1551,1561 
>   static void
>   mark_ref_stored (mem_ref_p ref, struct loop *loop)
>   {
> !   for (;
> !loop != current_loops->tree_root
> !&& !bitmap_bit_p (ref->stored, loop->num);
> !loop = loop_outer (loop))
> ! bitmap_set_bit (ref->stored, loop->num);
>   }
>   
>   /* Gathers memory references in statement STMT in LOOP, storing the
> --- 1552,1560 
>   static void
>   mark_ref_stored (mem_ref_p ref, struct loop *loop)
>   {
> !   while (loop != current_loops->tree_root
> !  && bitmap_set_bit (ref->stored, loop->num))
> ! loop = loop_outer (loop);
>   }
>   
>   /* Gathers memory references in statement STMT in LOOP, storing the
> *** gather_mem_refs_stmt (struct loop *loop,
> *** 1618,1624 
>   }
> bitmap_set_bit (memory_accesses.refs_in_loop[loop->num], ref->id);
> if (is_stored)
> ! mark_ref_stored (ref, loop);
> return;
>   }
>   
> --- 1617,1627 
>   }
> bitmap_set_bit (memory_accesses.refs_in_loop[loop->num], ref->id);
> if (is_stored)
> ! {
> !   bitmap_set_bit (memory_accesses.all_refs_stored_in_loop[loop->num],
> !   ref->id);
> !   mark_ref_stored (ref, loop);
> ! }
> return;
>   }
>   
> *** gather_mem_refs_stmt (struct loop *loop,
> *** 1627,1704 
>   static void
>   gather_mem_refs_in_loops (void)
>   {
> -   gimple_stmt_iterator bsi;
> -   basic_block bb;
> struct loop *loop;
> loop_iterator li;
> -   bitmap lrefs, alrefs, alrefso;
> - 
> -   FOR_EACH_BB (bb)
> - {
> -   loop = bb->loop_father;
> -   if (loop == current_loops->tree_root)
> - continue;
>   
> !   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
> ! gather_mem_refs_stmt (loop, gsi_stmt (bsi));
> ! }
>   
> /* Propagate the information about accessed memory references up
>the loop hierarchy.  */
> FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)

Re: [C++11][4.9] Add missing REDUC_PLUS_EXPR case to potential_constant_expression_1.

2013-03-15 Thread Gabriel Dos Reis
On Fri, Mar 15, 2013 at 3:51 AM, Richard Biener
 wrote:
> On Thu, Mar 14, 2013 at 10:08 PM, Marc Glisse  wrote:
>> On Thu, 14 Mar 2013, Jakub Jelinek wrote:
>>
>>> I wonder if it wouldn't be better to fold the target builtins only later
>>> on
>>> (e.g. guard the folding with cfun && gimple_in_ssa_p (cfun) (or if we have
>>> any predicate that is set starting with gimplification or so)).
>>> Having all the FEs have to deal with myriads of weird tree codes etc.
>>> isn't
>>> IMHO desirable.
>>
>>
>> Wouldn't that prevent from using those builtins in constant expressions?
>> That seems undesirable. Maybe an alternative could be to push some of the
>> functionality from potential_constant_expression_1 to the middle-end?
>
> True, but is that bad?
>
> If we want to delay such folding then please don't do it with a magic flag
> but instead do the folding only via fold_stmt - that is, add a new target hook
> that folds a gimple call.  I bet we have only a very limited set of target 
> call
> foldings, so transitioning them all to fold gimple calls would be easy.

upon reflection, I think we don't want to delay.  the C++ front-end
needs to know (while type checking) whether a certain operation
can be evaluated at compile time and possibly get the value of the
operation.

-- Gaby


Re: [C++11][4.9] Add missing REDUC_PLUS_EXPR case to potential_constant_expression_1.

2013-03-15 Thread Jakub Jelinek
On Fri, Mar 15, 2013 at 08:00:50AM -0500, Gabriel Dos Reis wrote:
> On Fri, Mar 15, 2013 at 3:51 AM, Richard Biener
>  wrote:
> > On Thu, Mar 14, 2013 at 10:08 PM, Marc Glisse  wrote:
> >> On Thu, 14 Mar 2013, Jakub Jelinek wrote:
> >>
> >>> I wonder if it wouldn't be better to fold the target builtins only later
> >>> on
> >>> (e.g. guard the folding with cfun && gimple_in_ssa_p (cfun) (or if we have
> >>> any predicate that is set starting with gimplification or so)).
> >>> Having all the FEs have to deal with myriads of weird tree codes etc.
> >>> isn't
> >>> IMHO desirable.
> >>
> >>
> >> Wouldn't that prevent from using those builtins in constant expressions?
> >> That seems undesirable. Maybe an alternative could be to push some of the
> >> functionality from potential_constant_expression_1 to the middle-end?
> >
> > True, but is that bad?
> >
> > If we want to delay such folding then please don't do it with a magic flag
> > but instead do the folding only via fold_stmt - that is, add a new target 
> > hook
> > that folds a gimple call.  I bet we have only a very limited set of target 
> > call
> > foldings, so transitioning them all to fold gimple calls would be easy.
> 
> upon reflection, I think we don't want to delay.  the C++ front-end
> needs to know (while type checking) whether a certain operation
> can be evaluated at compile time and possibly get the value of the
> operation.

If all arguments to the target builtin constant, then it better not be
folded into REDUC_PLUS_EXPR, but instead just some constant.  That is just
fine.  If all arguments to the target builtin aren't constant, then it won't
be a constant expression anyway, there is no point in showing all those
weird tree codes to the FE.

Jakub


RE: [PING^1] [AArch64] Implement Bitwise AND and Set Flags

2013-03-15 Thread Ian Bolton
> Please consider this as a reminder to review the patch posted at
> following link:-
> http://gcc.gnu.org/ml/gcc-patches/2013-01/msg01374.html
> 
> The patch is slightly modified to use CC_NZ mode instead of CC.
> 
> Please review the patch and let me know if its okay?
> 

Hi Naveen,

With the CC_NZ fix, the patch looks good apart from one thing:
the second "set" in each pattern should have the "=r,rk" constraint
rather than just "=r,r".

That said, I've attached a patch that provides more thorough test cases,
including execute ones.  When you get commit approval (which will be
after GCC goes into stage 1 again) then I can add in the test
cases.  You might as well run them now though, for more confidence
in your work.

BTW, I have an implementation of BICS that's been waiting for
GCC to hit stage 1.  I'll send that out for review soon.

NOTE: I do not have maintainer powers here, so you need someone else
to give the OK to your patch.

Cheers,
Ian
diff --git a/gcc/testsuite/gcc.target/aarch64/ands1.c 
b/gcc/testsuite/gcc.target/aarch64/ands1.c
new file mode 100644
index 000..e2bf956
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ands1.c
@@ -0,0 +1,150 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+extern void abort (void);
+
+int
+ands_si_test1 (int a, int b, int c)
+{
+  int d = a & b;
+
+  /* { dg-final { scan-assembler "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+int
+ands_si_test2 (int a, int b, int c)
+{
+  int d = a & 0xff;
+
+  /* { dg-final { scan-assembler "ands\tw\[0-9\]+, w\[0-9\]+, 255" } } */
+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+int
+ands_si_test3 (int a, int b, int c)
+{
+  int d = a & (b << 3);
+
+  /* { dg-final { scan-assembler "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+, lsl 
3" } } */
+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+typedef long long s64;
+
+s64
+ands_di_test1 (s64 a, s64 b, s64 c)
+{
+  s64 d = a & b;
+
+  /* { dg-final { scan-assembler "ands\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+" } } */
+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+s64
+ands_di_test2 (s64 a, s64 b, s64 c)
+{
+  s64 d = a & 0xff;
+
+  /* { dg-final { scan-assembler "ands\tx\[0-9\]+, x\[0-9\]+, 255" } } */
+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+s64
+ands_di_test3 (s64 a, s64 b, s64 c)
+{
+  s64 d = a & (b << 3);
+
+  /* { dg-final { scan-assembler "ands\tx\[0-9\]+, x\[0-9\]+, x\[0-9\]+, lsl 
3" } } */
+  if (d == 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+int main ()
+{
+  int x;
+  s64 y;
+
+  x = ands_si_test1 (29, 4, 5);
+  if (x != 13)
+abort();
+
+  x = ands_si_test1 (5, 2, 20);
+  if (x != 25)
+abort();
+
+  x = ands_si_test2 (29, 4, 5);
+  if (x != 38)
+abort();
+
+  x = ands_si_test2 (1024, 2, 20);
+  if (x != 1044)
+abort();
+
+  x = ands_si_test3 (35, 4, 5);
+  if (x != 41)
+abort();
+
+  x = ands_si_test3 (5, 2, 20);
+  if (x != 25)
+abort();
+
+  y = ands_di_test1 (0x13029ll,
+ 0x32004ll,
+ 0x505050505ll);
+
+  if (y != ((0x13029ll & 0x32004ll) + 0x32004ll + 0x505050505ll))
+abort();
+
+  y = ands_di_test1 (0x5000500050005ll,
+ 0x2111211121112ll,
+ 0x02020ll);
+  if (y != 0x5000500052025ll)
+abort();
+
+  y = ands_di_test2 (0x13029ll,
+ 0x32004ll,
+ 0x505050505ll);
+  if (y != ((0x13029ll & 0xff) + 0x32004ll + 0x505050505ll))
+abort();
+
+  y = ands_di_test2 (0x130002900ll,
+ 0x32004ll,
+ 0x505050505ll);
+  if (y != (0x130002900ll + 0x505050505ll))
+abort();
+
+  y = ands_di_test3 (0x13029ll,
+ 0x06408ll,
+ 0x505050505ll);
+  if (y != ((0x13029ll & (0x06408ll << 3))
+   + 0x06408ll + 0x505050505ll))
+abort();
+
+  y = ands_di_test3 (0x130002900ll,
+ 0x08808ll,
+ 0x505050505ll);
+  if (y != (0x130002900ll + 0x505050505ll))
+abort();
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ands2.c 
b/gcc/testsuite/gcc.target/aarch64/ands2.c
new file mode 100644
index 000..c778a54
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ands2.c
@@ -0,0 +1,156 @@
+/* { dg-do run } */
+/* { dg-options "-O2 --save-temps" } */
+
+extern void abort (void);
+
+int
+ands_si_test1 (int a, int b, int c)
+{
+  int d = a & b;
+
+  /* { dg-final { scan-assembler-not "ands\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } 
} */
+  /* { dg-final { scan-assembler "and\tw\[0-9\]+, w\[0-9\]+, w\[0-9\]+" } } */
+  if (d <= 0)
+return a + c;
+  else
+return b + d + c;
+}
+
+int
+ands_si_test2 (int a, int b, int c)
+{
+  int d = a & 0x;
+
+  /* { dg-final { scan-assembler-not "ands

Re: [PATCH, AArch64] Support SBC in the backend

2013-03-15 Thread Marcus Shawcroft

On 14/03/13 15:52, Ian Bolton wrote:

We couldn't generate SBC for AArch64 ... until now!

This really patch includes the main pattern, a zero_extend form
of it and a test.

Full regression testing for Linux and bare-metal passed.

OK for trunk stage-1?

Thanks,
Ian


2013-03-14  Ian Bolton  

gcc/
* config/aarch64/aarch64.md (*sub3_carryin): New pattern.
(*subsi3_carryin_uxtw): Likewise.

testsuite/
* gcc.target/aarch64/sbc.c: New test.




OK for stage-1.

/Marcus



Re: [PATCH, AArch64] Support ROR in backend

2013-03-15 Thread Marcus Shawcroft

On 14/03/13 15:42, Ian Bolton wrote:

We couldn't generate ROR (preferred alias of EXTR when both source
registers are the same) for AArch64, when rotating by an immediate,
... until now!

This patch includes the pattern and a test.

Full regression testing for Linux and bare-metal passed.

OK for trunk stage-1?

Thanks,
Ian


2013-03-14  Ian Bolton  

gcc/
* config/aarch64/aarch64.md (*ror3_insn): New pattern.
(*rorsi3_insn_uxtw): Likewise.

testsuite/
* gcc.target/aarch64/ror.c: New test.



OK for stage-1

/Marcus



Re: [PATCH, AArch64] Support EXTR in backend

2013-03-15 Thread Marcus Shawcroft

On 14/03/13 15:34, Ian Bolton wrote:

We couldn't generate EXTR for AArch64 ... until now!

This patch includes the pattern and a test.

Full regression testing for Linux and bare-metal passed.

OK for trunk stage-1?

Thanks,
Ian


2013-03-14  Ian Bolton  

gcc/
* config/aarch64/aarch64.md (*extr5_insn): New pattern.
(*extrsi5_insn_uxtw): Likewise.

testsuite/
* gcc.target/aarch64/extr.c: New test.



OK for stage-1.

/Marcus



Re: [Patch, AArch64] Implement framework for Tree/Gimple Implementation of NEON intrinsics.

2013-03-15 Thread Marcus Shawcroft

On 14/03/13 12:49, Tejas Belagod wrote:

Hi,

Attached is a patch that implements the framework necessary for implementing
NEON Intrinsics' builtins in Tree/Gimple rather than RTL. For this it uses the
target hook TARGET_FOLD_BUILTIN and folds all the builtins for NEON Intrinsics
into equivalent trees. This framework is accompanied by an example
implementation of vaddv_f<32, 64> intrinsics using the framework.

Regression tested on aarch64-none-elf. OK for trunk?

Thanks,
Tejas Belagod
ARM.

Changelog:

2013-03-14  Tejas Belagod  

gcc/
* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin): New.
* config/aarch64/aarch64-protos.h (aarch64_fold_builtin): Declare.
* config/aarch64/aarch64-simd-builtins.def: New entry for reduc_splus.
* config/aarch64/aarch64.c (TARGET_FOLD_BUILTIN): Define.
* config/aarch64/arm_neon.h (vaddv_f32, vaddvq_f32, vaddvq_f64): New.

testsuite/
* gcc.target/aarch64/vaddv-intrinsic-compile.c: New.
* gcc.target/aarch64/vaddv-intrinsic.c: New.



I think we need to wait for a resolution to this thread:
http://gcc.gnu.org/ml/gcc-patches/2013-03/msg00505.html
before we can take this patch.

/Marcus



Re: [PING^1] [AArch64] Implement Bitwise AND and Set Flags

2013-03-15 Thread Richard Henderson

On 2013-02-13 22:23, Hurugalawadi, Naveen wrote:

bove
+(define_insn "*andsi3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+   (compare:CC_NZ
+(and:SI (match_operand:SI 1 "register_operand" "%r,r")
+(match_operand:SI 2 "aarch64_logical_operand" "r,K"))
+(const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r,r")
+   (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2]


PLUS?


r~



[C++ PATCH] Fix a pasto in cp_tree_equal

2013-03-15 Thread Jakub Jelinek
Hi!

While working on OpenMP 4.0 atomics support, I've run into this
pasto.  Acked by Jason privately, queued for 4.8.1 and 4.9.

2013-03-15  Jakub Jelinek  

* tree.c (cp_tree_equal): Fix a pasto.

--- gcc/cp/tree.c.jj2013-03-11 10:04:11.0 +0100
+++ gcc/cp/tree.c   2013-03-15 12:18:16.958721269 +0100
@@ -2486,7 +2486,7 @@ cp_tree_equal (tree t1, tree t2)
 t1 = TREE_OPERAND (t1, 0);
   for (code2 = TREE_CODE (t2);
CONVERT_EXPR_CODE_P (code2)
-|| code1 == NON_LVALUE_EXPR;
+|| code2 == NON_LVALUE_EXPR;
code2 = TREE_CODE (t2))
 t2 = TREE_OPERAND (t2, 0);
 

Jakub


[gomp4] C++ OpenMP 4.0 atomics support

2013-03-15 Thread Jakub Jelinek
Hi!

As the updated comments show, OpenMP 4.0 (rc2 so far) has added a bunch of
new #pragma omp {,update,capture} forms.  Here is C++ support for that,
depending on http://gcc.gnu.org/ml/gcc-patches/2013-03/msg00546.html
Queued for gomp-4_0-branch (to be created next week).  Comments?

I'm afraid C support will be harder, given the lack of tentative parsing
support and no counterpart to cp_tree_equal (operand_equal_p won't do it,
that one returns false on side-effects etc.).

2013-03-15  Jakub Jelinek  

* c-parser.c (c_parser_omp_atomic): Adjust comment.
Add another argument to c_finish_omp_atomic.

* parser.c (cp_parser_binary_expression): Handle no_toplevel_fold_p
even for binary operations other than comparison.
(cp_parser_omp_atomic): Handle parsing OpenMP 4.0 atomics.
* pt.c (tsubst_expr) : Handle atomic exchange.
* semantics.c (finish_omp_atomic): Use cp_tree_equal to diagnose
expression mismatches and to find out if c_finish_omp_atomic
should be called with swapped set to true or false.

* c-omp.c (c_finish_omp_atomic): Add swapped argument, if true,
build the operation first with rhs, lhs arguments and use NOP_EXPR
build_modify_expr.
* c-common.h (c_finish_omp_atomic): Adjust prototype.

* c-c++-common/gomp/atomic-15.c: Remove error test that is now
valid in OpenMP 4.0.

* testsuite/libgomp.c++/atomic-10.C: New test.
* testsuite/libgomp.c++/atomic-11.C: New test.
* testsuite/libgomp.c++/atomic-12.C: New test.
* testsuite/libgomp.c++/atomic-13.C: New test.

--- gcc/c/c-parser.c.jj 2013-02-13 09:29:17.0 +0100
+++ gcc/c/c-parser.c2013-03-15 14:40:29.145039411 +0100
@@ -9500,10 +9500,18 @@ c_parser_omp_structured_block (c_parser
update-stmt:
  expression-stmt | x = x binop expr
capture-stmt:
- v = x binop= expr | v = x++ | v = ++x | v = x-- | v = --x
+ v = expression-stmt
capture-block:
  { v = x; update-stmt; } | { update-stmt; v = x; }
 
+   OpenMP 4.0:
+   update-stmt:
+ expression-stmt | x = x binop expr | x = expr binop x
+   capture-stmt:
+ v = update-stmt
+   capture-block:
+ { v = x; update-stmt; } | { update-stmt; v = x; } | { v = x; x = expr; }
+
   where x and v are lvalue expressions with scalar type.
 
   LOC is the location of the #pragma token.  */
@@ -9826,7 +9834,8 @@ stmt_done:
   c_parser_require (parser, CPP_CLOSE_BRACE, "expected %<}%>");
 }
 done:
-  stmt = c_finish_omp_atomic (loc, code, opcode, lhs, rhs, v, lhs1, rhs1);
+  stmt = c_finish_omp_atomic (loc, code, opcode, lhs, rhs, v, lhs1, rhs1,
+ false);
   if (stmt != error_mark_node)
 add_stmt (stmt);
 
--- gcc/cp/parser.c.jj  2013-03-07 15:32:40.0 +0100
+++ gcc/cp/parser.c 2013-03-15 14:20:15.361064904 +0100
@@ -7471,9 +7471,11 @@ cp_parser_binary_expression (cp_parser*
   */
   if (no_toplevel_fold_p
  && lookahead_prec <= current.prec
- && sp == stack
- && TREE_CODE_CLASS (current.tree_type) == tcc_comparison)
-   current.lhs = build2 (current.tree_type, boolean_type_node,
+ && sp == stack)
+   current.lhs = build2 (current.tree_type,
+ TREE_CODE_CLASS (current.tree_type)
+ == tcc_comparison
+ ? boolean_type_node : TREE_TYPE (current.lhs),
  current.lhs, rhs);
   else
current.lhs = build_x_binary_op (current.loc, current.tree_type,
@@ -26426,10 +26428,18 @@ cp_parser_omp_structured_block (cp_parse
update-stmt:
  expression-stmt | x = x binop expr
capture-stmt:
- v = x binop= expr | v = x++ | v = ++x | v = x-- | v = --x
+ v = expression-stmt
capture-block:
  { v = x; update-stmt; } | { update-stmt; v = x; }
 
+   OpenMP 4.0:
+   update-stmt:
+ expression-stmt | x = x binop expr | x = expr binop x
+   capture-stmt:
+ v = update-stmt
+   capture-block:
+ { v = x; update-stmt; } | { update-stmt; v = x; } | { v = x; x = expr; }
+
   where x and v are lvalue expressions with scalar type.  */
 
 static void
@@ -26596,75 +26606,139 @@ restart:
  opcode = BIT_XOR_EXPR;
  break;
case CPP_EQ:
- if (structured_block || code == OMP_ATOMIC)
+ enum cp_parser_prec oprec;
+ cp_token *token;
+ cp_lexer_consume_token (parser->lexer);
+ cp_parser_parse_tentatively (parser);
+ rhs1 = cp_parser_simple_cast_expression (parser);
+ if (rhs1 == error_mark_node)
{
- enum cp_parser_prec oprec;
- cp_token *token;
- cp_lexer_consume_token (parser->lexer);
- rhs1 = cp_parser_unary_expression (parser, /*address_p=*/false,
-/*cast_p=*/false, NULL);
- if (rhs1 == error_mark_node)
-   goto s

Re: [PATCH, AArch64] Support SBC in the backend

2013-03-15 Thread Gerald Pfeifer
On Fri, 15 Mar 2013, Marcus Shawcroft wrote:
> OK for stage-1.

This may be a naive question, but with AArch64 being a new port
and these changes being port-specific, with no impact on anything
else, have you considered asking the release managers to be more 
aggressive in applying things?

Of course, if you, too, are stabilizing the port and it's good
enough to be used as is and those changes are just the topping
on the ice cream, just ignore my note. :-)

Gerald


Re: [PATCH, AArch64] Support SBC in the backend

2013-03-15 Thread Richard Earnshaw

On 15/03/13 16:37, Gerald Pfeifer wrote:

On Fri, 15 Mar 2013, Marcus Shawcroft wrote:

OK for stage-1.


This may be a naive question, but with AArch64 being a new port
and these changes being port-specific, with no impact on anything
else, have you considered asking the release managers to be more
aggressive in applying things?

Of course, if you, too, are stabilizing the port and it's good
enough to be used as is and those changes are just the topping
on the ice cream, just ignore my note. :-)

Gerald



Yes, it is a new port, but it's important that 4.8.0 works and we can't 
afford a mess-up at the last minute causing problems.


I think it would be better to seek leniency for selected back-ports 
early in the 4.8.1 cycle.


R.



[google/gcc-4_7]Mark expected failures

2013-03-15 Thread Jing Yu
Got new regression failures when using gold to run gcc regression
tests. The failures are related to LIPO (b/8397853).
Since LIPO won't be available for Powerpc64 target until the end of
2013Q2, mark these tests expected failure.

OK for google/gcc-4_7?

Tested:
Extract testresults from nightly build into /tmp/testresult/43933791, run
 ./validate_failures.py --build_dir=/tmp/testresult/43933791
--manifest powerpc64-grtev3-linux-gnu.xfail
SUCCESS: No unexpected failures.

Index: contrib/testsuite-management/powerpc64-grtev3-linux-gnu.xfail
===
--- contrib/testsuite-management/powerpc64-grtev3-linux-gnu.xfail
 (revision 196617)
+++ contrib/testsuite-management/powerpc64-grtev3-linux-gnu.xfail
 (working copy)
@@ -128,10 +128,15 @@ FAIL: g++.dg/ext/cleanup-9.C -std=gnu++98 executio
 FAIL: g++.dg/ext/cleanup-9.C -std=gnu++11 execution test
 FAIL: g++.dg/warn/Wself-assign-2.C -std=gnu++11  (test for warnings, line 12)
 FAIL: g++.dg/tree-prof/lipo/vcall1_0.C scan-ipa-dump-times profile
"Indirect call -> direct call" 2
-FAIL: g++.dg/tree-prof/mversn15.C execution,-fprofile-generate
+# b/8397853, a LIPO bug, causing compilation to fail.
+FAIL: g++.dg/tree-prof/mversn15.C compilation,  -fprofile-generate
+UNRESOLVED: g++.dg/tree-prof/mversn15.C execution,-fprofile-generate
+#FAIL: g++.dg/tree-prof/mversn15.C execution,-fprofile-generate
 UNRESOLVED: g++.dg/tree-prof/mversn15.C compilation,  -fprofile-use
 UNRESOLVED: g++.dg/tree-prof/mversn15.C execution,-fprofile-use
-FAIL: g++.dg/tree-prof/mversn15a.C execution,-fprofile-generate
+FAIL: g++.dg/tree-prof/mversn15a.C compilation,  -fprofile-generate
+UNRESOLVED: g++.dg/tree-prof/mversn15a.C execution,-fprofile-generate
+#FAIL: g++.dg/tree-prof/mversn15a.C execution,-fprofile-generate
 UNRESOLVED: g++.dg/tree-prof/mversn15a.C compilation,  -fprofile-use
 UNRESOLVED: g++.dg/tree-prof/mversn15a.C execution,-fprofile-use


Re: [google/gcc-4_7]Mark expected failures

2013-03-15 Thread Xinliang David Li
ok.

David

On Fri, Mar 15, 2013 at 10:53 AM, Jing Yu  wrote:
> Got new regression failures when using gold to run gcc regression
> tests. The failures are related to LIPO (b/8397853).
> Since LIPO won't be available for Powerpc64 target until the end of
> 2013Q2, mark these tests expected failure.
>
> OK for google/gcc-4_7?
>
> Tested:
> Extract testresults from nightly build into /tmp/testresult/43933791, run
>  ./validate_failures.py --build_dir=/tmp/testresult/43933791
> --manifest powerpc64-grtev3-linux-gnu.xfail
> SUCCESS: No unexpected failures.
>
> Index: contrib/testsuite-management/powerpc64-grtev3-linux-gnu.xfail
> ===
> --- contrib/testsuite-management/powerpc64-grtev3-linux-gnu.xfail
>  (revision 196617)
> +++ contrib/testsuite-management/powerpc64-grtev3-linux-gnu.xfail
>  (working copy)
> @@ -128,10 +128,15 @@ FAIL: g++.dg/ext/cleanup-9.C -std=gnu++98 executio
>  FAIL: g++.dg/ext/cleanup-9.C -std=gnu++11 execution test
>  FAIL: g++.dg/warn/Wself-assign-2.C -std=gnu++11  (test for warnings, line 12)
>  FAIL: g++.dg/tree-prof/lipo/vcall1_0.C scan-ipa-dump-times profile
> "Indirect call -> direct call" 2
> -FAIL: g++.dg/tree-prof/mversn15.C execution,-fprofile-generate
> +# b/8397853, a LIPO bug, causing compilation to fail.
> +FAIL: g++.dg/tree-prof/mversn15.C compilation,  -fprofile-generate
> +UNRESOLVED: g++.dg/tree-prof/mversn15.C execution,-fprofile-generate
> +#FAIL: g++.dg/tree-prof/mversn15.C execution,-fprofile-generate
>  UNRESOLVED: g++.dg/tree-prof/mversn15.C compilation,  -fprofile-use
>  UNRESOLVED: g++.dg/tree-prof/mversn15.C execution,-fprofile-use
> -FAIL: g++.dg/tree-prof/mversn15a.C execution,-fprofile-generate
> +FAIL: g++.dg/tree-prof/mversn15a.C compilation,  -fprofile-generate
> +UNRESOLVED: g++.dg/tree-prof/mversn15a.C execution,-fprofile-generate
> +#FAIL: g++.dg/tree-prof/mversn15a.C execution,-fprofile-generate
>  UNRESOLVED: g++.dg/tree-prof/mversn15a.C compilation,  -fprofile-use
>  UNRESOLVED: g++.dg/tree-prof/mversn15a.C execution,-fprofile-use


RE: FW: [PATCH] [MIPS] microMIPS gcc support

2013-03-15 Thread Moore, Catherine
Hi Richard,
There are a couple of embedded comments, plus new patch attached.  Are we there 
yet?
Thanks,
Catherine

> -Original Message-
> From: Richard Sandiford [mailto:rdsandif...@googlemail.com]
> Sent: Thursday, March 14, 2013 4:55 PM
> To: Moore, Catherine
> Cc: gcc-patches@gcc.gnu.org; Rozycki, Maciej
> Subject: Re: FW: [PATCH] [MIPS] microMIPS gcc support
> 
> "Moore, Catherine"  writes:
> >> -Original Message-
> >> From: Richard Sandiford [mailto:rdsandif...@googlemail.com]
> >> Sent: Tuesday, March 05, 2013 4:06 PM
> >> To: Moore, Catherine
> >> Cc: gcc-patches@gcc.gnu.org; Rozycki, Maciej
> >> Subject: Re: FW: [PATCH] [MIPS] microMIPS gcc support:
> >>
> >> We have a few internal-only undocumented constraints that aren't used
> >> much, so we should be able to move them to the "Y" space instead.
> >> The patch below does this for "T" and "U".  Then we could use "U" for
> >> new, longer constraints.
> >>
> >>
> >> U
> >>
> >> where  is:
> >>
> >>   s for signed
> >>   u for unsigned
> >>   d for decremented unsigned (-1 ... N)
> >>   i for incremented unsigned (1 ... N)
> >>
> >> where  is:
> >>
> >>   b for "byte" (*1)
> >>   h for "halfwords" (*2)
> >>   w for "words" (*4)
> >>   d for "doublewords" (*8) -- useful for 64-bit MIPS16 but probably not
> >>   needed for 32-bit microMIPS
> >>
> >> and where  is the number of bits.   and  could be
> >> replaced with an ad-hoc two-letter combination for special cases.
> >> E.g. "Uas9" ("add stack") for ADDISUP.
> >>
> >> Just a suggestion though.  I'm not saying these names are totally
> >> intuitive or anything, but they should at least be better than arbitrary
> letters.
> >>
> >> Also,  could be two digits if necessary, or we could just use hex
> digits.
> >
> > I extended this proposal a bit by:
> > 1.  Adding a  e for encoded.  The constraint will start with Ue,
> > when the operand is an encoded value.
> > 2. I decided to use two digits for .
> > 3. The ad-hoc combination is used for anything else.
> 
> First of all, thanks for coming up with a counter-suggestion.  I'm hopeless at
> naming things, so I was hoping there would be at least some pushback.
> 
> "e" for "encoded" sounds good.  I'm less keen on the mixture of single- and
> double-digit widths though (single digit for some "Ue"s, double digits for
> other "U"s.)  I think we should either:
> 
> (a) use the same number of digits for all "U" constraints.  That leaves
> one character for the "Ue" type, which isn't as mnemonic, but is in
> line with what we do elsewhere.
> 
> (b) avoid digits in the "Ue" forms and just have an ad-hoc letter combination.
> 
> Please keep "U" for constants though.  The memory constraints should go
> under "Z" instead (and therefore be only two letters long).  The idea is that
> the first letter of the constraint tells you what type it is.
> 
> I don't think there's any need for the "Ue" constraints to have predicates of
> the same name.  We can go with longer, mnemonic, names instead.  The idea
> behind suggesting "sb4_operand", etc., was that (a) every character was
> predictable and (b) I'm not sure the extra verbosity of (say)
> "signed_byte_4_operand" would help.
> But "addiur2_operand" would be good.
> 
> > +(define_constraint "Udb07"
> > +  "@internal
> > +   A decremented unsigned constant of 7 bits."
> > +  (match_operand 0 "Udb07_operand" ""))
> 
> Very minor nit, but these "" are redundant.
> 
> > +(define_constraint "Ueim4"
> > +  "@internal
> > +   A microMIPS encoded ADDIUR2 immediate operand."
> > +  (match_operand 0 "Ueim4_operand" ""))
> 
> Again minor, but the name doesn't really seem to match the description.
> Is this constraint needed for things other than ADDIUR2?  

The constraint is only used for ADDIUR2.

If so, it might be
> worth giving a second example, otherwise it might be better to make the
> name a bit less general.  Unless this name comes from the manual, of course
> :-)  (The microMIPS link on the MIPS website was still broken last time I
> checked, but I haven't tried it again in the last couple of weeks.)
> 
> > +(define_predicate "Umem0_operand"
> > +  (and (match_code "mem")
> > +   (match_test "umips_lwsp_swsp_address_p (XEXP (op, 0),
> > +mode)")))
> > +
> > +(define_predicate "Uload_operand"
> > +  (and (match_code "mem")
> > +   (match_test "umips_address_p (XEXP (op, 0), true, mode)")))
> > +
> > +(define_predicate "Ustore_operand"
> > +  (and (match_code "mem")
> > +   (match_test "umips_address_p (XEXP (op, 0), false, mode)")))
> 
> With the two-letter Z constraints, these should have descriptive names.
> 
> > +(define_predicate "Udb07_operand"
> > +  (and (match_code "const_int")
> > +   (match_test "mips_unsigned_immediate_p (INTVAL (op) + 1, 7,
> > +0)")))
> 
> Please drop the "U"s in the predicate names.
> 
> > +(define_attr "compression" "none,all,micromips,mips16"
> > +  (const_string "none"))
> 
> Thinking about it a bit more, it would probably be better to leave th

[PATCH, ARM] ARM Linux kernel-assisted atomic operation helpers vs. libcall argument promotion

2013-03-15 Thread Julian Brown
Hi,

At present, the libcall helpers implementing atomic operations
(__sync_val_compare_and_swap_X) for char and short types suffer from
a type mismatch. This is leading to test failures, i.e.:

FAIL: gcc.dg/atomic-compare-exchange-1.c execution test
FAIL: gcc.dg/atomic-compare-exchange-2.c execution test

On investigation, these tests pass if the values used in the tests are
tweaked so that they are in the range representable by both signed and
unsigned chars, i.e. 0 to 127, rather than ~0. The failures are
happening because libcall expansion is sign-extending sub-word-size
arguments (e.g. EXPECTED, DESIRED in
optabs.c:expand_atomic_compare_and_swap), but the functions
implementing the operations are written to take unsigned arguments,
zero-extended, and the unexpected out-of-range values cause them to
fail.

The sign-extension happens because in calls.c:emit_library_call_value_1
we have:

   mode = promote_function_mode (NULL_TREE, mode, &unsigned_p, NULL_TREE, 0);
   argvec[count].mode = mode;
   argvec[count].value = convert_modes (mode, GET_MODE (val), val, unsigned_p);
   argvec[count].reg = targetm.calls.function_arg (args_so_far, mode,
   NULL_TREE, true);

This calls back into arm.c:arm_promote_function_mode, which promotes
less-than-four-byte integral values to SImode, but never modifies the
PUNSIGNEDP argument. So, such values always get sign extended when being
passed to libcalls.

The simplest fix for this (since libcalls don't have proper tree types
to inspect for the actual argument types) is just to define the
linux-atomic.c functions to use signed char/short instead of unsigned
char/unsigned short, approximately reversing the change in this earlier
patch:

http://gcc.gnu.org/ml/gcc-patches/2010-08/msg00492.html

A slight change is also required to the
__sync_val_compare_and_swap_* implementation in order to treat the
signed OLDVAL argument correctly (I believe the other macros are OK).

Tested cross to ARM Linux, default & thumb multilibs. The
above-mentioned tests change from FAIL to PASS. OK to apply?

Thanks,

Julian

ChangeLog

libgcc/
* config/arm/linux-atomic.c (SUBWORD_SYNC_OP, SUBWORD_VAL_CAS)
(SUBWORD_TEST_AND_SET): Use signed char/short types instead of
unsigned char/unsigned short.
(__sync_val_compare_and_swap_{1,2}): Handle signed argument.
Index: libgcc/config/arm/linux-atomic.c
===
--- libgcc/config/arm/linux-atomic.c	(revision 196648)
+++ libgcc/config/arm/linux-atomic.c	(working copy)
@@ -97,19 +97,19 @@ FETCH_AND_OP_WORD (nand, ~, &)
 return (RETURN & mask) >> shift;	\
   }
 
-SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (or,, |, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, oldval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
-
-SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (or,, |, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, oldval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (add,   , +, short, 2, oldval)
+SUBWORD_SYNC_OP (sub,   , -, short, 2, oldval)
+SUBWORD_SYNC_OP (or,, |, short, 2, oldval)
+SUBWORD_SYNC_OP (and,   , &, short, 2, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, short, 2, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, short, 2, oldval)
+
+SUBWORD_SYNC_OP (add,   , +, signed char, 1, oldval)
+SUBWORD_SYNC_OP (sub,   , -, signed char, 1, oldval)
+SUBWORD_SYNC_OP (or,, |, signed char, 1, oldval)
+SUBWORD_SYNC_OP (and,   , &, signed char, 1, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, signed char, 1, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, signed char, 1, oldval)
 
 #define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP)\
   int HIDDEN\
@@ -132,19 +132,19 @@ OP_AND_FETCH_WORD (and,   , &)
 OP_AND_FETCH_WORD (xor,   , ^)
 OP_AND_FETCH_WORD (nand, ~, &)
 
-SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (or,, |, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, newval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval)
-
-SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (or,, |, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, newval)
-SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (add,   , +, short, 2, newval)
+SUBWORD_SYNC_OP (sub,   , -, short, 2, newval)
+SUBWORD_SYNC_OP (or,, |, short, 2

Re: [gomp4] C++ OpenMP 4.0 atomics support

2013-03-15 Thread Toon Moene

On 03/15/2013 05:27 PM, Jakub Jelinek wrote:


Queued for gomp-4_0-branch (to be created next week).  Comments?


I heard from colleagues on the Fortran Standardization Committee 
(http://j3-fortran.org) that 4.0 doubled in size w.r.t. the 3.x standard.


I wish you lots of success implementing this - it is really hard to get 
a cross-language standard like this one correct, let alone its 
implementation.


The reports I receive on the OpenMP implementation in GCC (from the 
gfortran users' side) are without exception positive.


Thanks !

--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands
At home: http://moene.org/~toon/; weather: http://moene.org/~hirlam/
Progress of GNU Fortran: http://gcc.gnu.org/wiki/GFortran#news


*Ping* [Patch, libfortran] PR51825 - Fortran runtime error: Cannot match namelist object name

2013-03-15 Thread Tilo Schwarz

Hi,

this is a ping for

[Patch, libfortran] PR51825 - Fortran runtime error: Cannot match namelist  
object name

http://gcc.gnu.org/ml/gcc-patches/2013-03/msg00316.html


Regards,

Tilo


[google/gcc-4_7-mobile] Fix inconsistency between Makefile.am and Makefile.in under libstdc++-v3

2013-03-15 Thread 沈涵
Fix inconsistency between Makefile.am and Makefile.in under libstdc++.

The inconsistency was introduced by
svn+ssh://gcc.gnu.org/svn/gcc/branches/google/gcc-4_7@194664

Since the re-generated Makefile.in has not changed (thus not included
in the patch), this modification should have no impact.

Hi Caroline, ok for google/gcc-4_7-mobile branch?

Patch >>>
diff --git a/libstdc++-v3/libsupc++/Makefile.am
b/libstdc++-v3/libsupc++/Makefile.am
index 34abc2c..0c4339d 100644
--- a/libstdc++-v3/libsupc++/Makefile.am
+++ b/libstdc++-v3/libsupc++/Makefile.am
@@ -137,7 +137,10 @@ nested_exception.o: nested_exception.cc
 AM_CXXFLAGS = \
  $(glibcxx_lt_pic_flag) $(glibcxx_compiler_shared_flag) \
  $(XTEMPLATE_FLAGS) \
- $(WARN_CXXFLAGS) $(OPTIMIZE_CXXFLAGS)  $(CONFIG_CXXFLAGS)
+ $(WARN_CXXFLAGS) \
+ $(OPTIMIZE_CXXFLAGS) \
+ $(CONFIG_CXXFLAGS) \
+ $($(@)_no_omit_frame_pointer)

 AM_MAKEFLAGS = \
  "gxx_include_dir=$(gxx_include_dir)"

--
Han Shen


[google][4.7]Using CPU mocks to test code coverage of multiversioned functions

2013-03-15 Thread Sriraman Tallam
Hi,

   This patch is meant for google/gcc-4_7 but I want this to be
considered for trunk when it opens again. This patch makes it easy to
test for code coverage of multiversioned functions. Here is a
motivating example:

__attribute__((target ("default"))) int foo () { ... return 0; }
__attribute__((target ("sse"))) int foo () { ... return 1; }
__attribute__((target ("popcnt"))) int foo () { ... return 2; }

int main ()
{
  return foo();
}

Lets say your test CPU supports popcnt.  A run of this program will
invoke the popcnt version of foo (). Then, how do we test the sse
version of foo()? To do that for the above example, we need to run
this code on a CPU that has sse support but no popcnt support.
Otherwise, we need to comment out the popcnt version and run this
example. This can get painful when there are many versions. The same
argument applies to testing  the default version of foo.

So, I am introducing the ability to mock a CPU. If the CPU you are
testing on supports sse, you should be able to test the sse version.

First, I have introduced a new flag called -fmv-debug.  This patch
invokes the function version dispatcher every time a call to a foo ()
is made. Without that flag, the version dispatch happens once at
startup time via the IFUNC mechanism.

Also, with -fmv-debug, the version dispatcher uses the two new
builtins "__builtin_mock_cpu_is" and "__builtin_mock_cpu_supports" to
check the cpu type and cpu isa.

Then, I plan to add the following hooks to libgcc (in a different patch) :

int set_mock_cpu_is (const char *cpu);
int set_mock_cpu_supports (const char *isa);
int init_mock_cpu (); // Clear the values of the mock cpu.

With this support, here is how you can test for code coverage of the
"sse" version and "default version of foo in the above example:

int main ()
{
  // Test SSE version.
   if (__builtin_cpu_supports ("sse"))
   {
 init_mock_cpu();
 set_mock_cpu_supports ("sse");
 assert (foo () == 1);
   }
  // Test default version.
  init_mock_cpu();
  assert (foo () == 0);
}

Invoking a multiversioned binary several times with appropriate mock
cpu values for the various ISAs and CPUs will give the complete code
coverage desired. Ofcourse, the underlying platform should be able to
support the various features.

Note that the above test will work only with -fmv-debug as the
dispatcher must be invoked on every multiversioned call to be able to
dynamically change the version.

Multiple ISA features can be set in the mock cpu by calling
"set_mock_cpu_supports" several times with different ISA names.
Calling "init_mock_cpu" will clear all the values. "set_mock_cpu_is"
will set the CPU type.

This patch only includes the gcc changes.  I will separately prepare a
patch for the libgcc changes. Right now, since the libgcc changes are
not available the two new mock cpu builtins check the real CPU like
"__builtin_cpu_is" and "__builtin_cpu_supports".

Patch attached.  Please look at mv14_debug_code_coverage.C for an
exhaustive example of testing for code coverage in the presence of
multiple versions.

Comments please.

Thanks
Sri
Index: cgraphunit.c
===
--- cgraphunit.c(revision 196618)
+++ cgraphunit.c(working copy)
@@ -942,7 +942,12 @@ cgraph_analyze_function (struct cgraph_node *node)
{
  tree resolver = NULL_TREE;
  gcc_assert (targetm.generate_version_dispatcher_body);
- resolver = targetm.generate_version_dispatcher_body (node);
+ /* flag_mv_debug is 0 means that the dispatcher should be invoked
+optimally (once using ifunc support).  When flag_mv_debug is 1,
+the dispatcher should be invoked every time a call to the
+ multiversioned function is made.  */
+ resolver
+   = targetm.generate_version_dispatcher_body (node, flag_mv_debug);
  gcc_assert (resolver != NULL_TREE);
}
 }
Index: common.opt
===
--- common.opt  (revision 196618)
+++ common.opt  (working copy)
@@ -1600,6 +1600,10 @@ fmove-loop-invariants
 Common Report Var(flag_move_loop_invariants) Init(1) Optimization
 Move loop invariant computations out of loops
 
+fmv-debug
+Common RejectNegative Report Var(flag_mv_debug) Init(0)
+Invoke the function version dispatcher for every multiversioned function call.
+
 ftsan
 Common RejectNegative Report Var(flag_tsan)
 Add ThreadSanitizer instrumentation
Index: doc/tm.texi
===
--- doc/tm.texi (revision 196618)
+++ doc/tm.texi (working copy)
@@ -11032,11 +11032,13 @@ version at run-time. @var{decl} is one version fro
 identical versions.
 @end deftypefn
 
-@deftypefn {Target Hook} tree TARGET_GENERATE_VERSION_DISPATCHER_BODY (void 
*@var{arg})
+@deftypefn {Target Hook} tree TARGET_GENERATE_VERSION_DISPATCHER_BODY (void 
*@var{arg}, int @var{debug_mode})
 This hook i

Re: [rtl] combine a vec_concat of 2 vec_selects from the same vector

2013-03-15 Thread Marc Glisse

On Sun, 30 Sep 2012, Marc Glisse wrote:


On Sat, 29 Sep 2012, Eric Botcazou wrote:


this patch lets the compiler try to rewrite:

(vec_concat (vec_select x [a]) (vec_select x [b]))

as:

vec_select x [a b]

or even just "x" if appropriate.

[...]
Why not generalizing to all kinds of VEC_SELECTs instead of just scalar 
ones?


Ok, I changed the patch a bit to handle arbitrary VEC_SELECTs, and moved the 
identity recognition to VEC_SELECT handling (where it belonged). Testing with 
non-scalar VEC_SELECTs was limited though, because they are not that easy to 
generate. Also, the identity case is the only one where it actually 
optimized. To handle more cases, I'd have to look through several layers of 
VEC_SELECTs, which gets a bit complicated (for instance, the permutation 
0,1,3,2 will appear as a vec_concat of a vec_select(v,[0,1]) and a 
vec_select(vec_select(v,[2,3]),[1,0]), or worse with a vec_concat in the 
middle). It also didn't optimize 3,2,3,2, possibly because that meant 
substituting the same rtx twice (I didn't go that far in gdb). Then there is 
also the vec_duplicate case (I should try to replace vec_duplicate with 
vec_concat in simplify-rtx to see what happens...). Still, the identity case 
is nice to have.


I think I may have been a bit too hasty removing the restriction to the 
scalar case (and even that version was possibly wrong for some targets). 
For instance, if I have a vec_concat of 2 permutations of a vector, this 
will generate a vec_select with a result twice the size of its input, 
which will likely fail to be recognized. For now I have only managed to 
reach this situation in combine, where the unrecognizable expression is 
simply ignored. But I think simplify-rtx is used in less forgiving places 
(and even in combine it could cause optimizations to be missed).


My current understanding of simplify-rtx is that we should only do "safe" 
optimizations in it (make sure we only create expressions that every 
target will recognize), and if I want more advanced optimizations, I 
should do them elsewhere (not sure where). So I should probably at least 
restrict this one to the case where the result and XEXP (trueop0, 0) have 
the same mode.


Does that make sense? Or is the current code ok and I am worrying for 
nothing?


For reference, the conversation started here:
http://gcc.gnu.org/ml/gcc-patches/2012-09/msg00540.html
and I include a copy of the relevant part of the patch that was committed:


2012-09-09  Marc Glisse  

gcc/
* simplify-rtx.c (simplify_binary_operation_1)
: Handle VEC_SELECTs from the same vector.


+   /* Try to merge VEC_SELECTs from the same vector into a single one.  */
+   if (GET_CODE (trueop0) == VEC_SELECT
+   && GET_CODE (trueop1) == VEC_SELECT
+   && rtx_equal_p (XEXP (trueop0, 0), XEXP (trueop1, 0)))
+ {
+   rtx par0 = XEXP (trueop0, 1);
+   rtx par1 = XEXP (trueop1, 1);
+   int len0 = XVECLEN (par0, 0);
+   int len1 = XVECLEN (par1, 0);
+   rtvec vec = rtvec_alloc (len0 + len1);
+   for (int i = 0; i < len0; i++)
+ RTVEC_ELT (vec, i) = XVECEXP (par0, 0, i);
+   for (int i = 0; i < len1; i++)
+ RTVEC_ELT (vec, len0 + i) = XVECEXP (par1, 0, i);
+   return simplify_gen_binary (VEC_SELECT, mode, XEXP (trueop0, 0),
+   gen_rtx_PARALLEL (VOIDmode, vec));
+ }


--
Marc Glisse


Re: [google][4.7]Using CPU mocks to test code coverage of multiversioned functions

2013-03-15 Thread Xinliang David Li
On Fri, Mar 15, 2013 at 2:55 PM, Sriraman Tallam  wrote:
> Hi,
>
>This patch is meant for google/gcc-4_7 but I want this to be
> considered for trunk when it opens again. This patch makes it easy to
> test for code coverage of multiversioned functions. Here is a
> motivating example:
>
> __attribute__((target ("default"))) int foo () { ... return 0; }
> __attribute__((target ("sse"))) int foo () { ... return 1; }
> __attribute__((target ("popcnt"))) int foo () { ... return 2; }
>
> int main ()
> {
>   return foo();
> }
>
> Lets say your test CPU supports popcnt.  A run of this program will
> invoke the popcnt version of foo (). Then, how do we test the sse
> version of foo()? To do that for the above example, we need to run
> this code on a CPU that has sse support but no popcnt support.
> Otherwise, we need to comment out the popcnt version and run this
> example. This can get painful when there are many versions. The same
> argument applies to testing  the default version of foo.
>
> So, I am introducing the ability to mock a CPU. If the CPU you are
> testing on supports sse, you should be able to test the sse version.
>
> First, I have introduced a new flag called -fmv-debug.  This patch
> invokes the function version dispatcher every time a call to a foo ()
> is made. Without that flag, the version dispatch happens once at
> startup time via the IFUNC mechanism.
>
> Also, with -fmv-debug, the version dispatcher uses the two new
> builtins "__builtin_mock_cpu_is" and "__builtin_mock_cpu_supports" to
> check the cpu type and cpu isa.

With this option, compiler probably can also define some macros so
that if user can use to write overriding hooks.

>
> Then, I plan to add the following hooks to libgcc (in a different patch) :
>
> int set_mock_cpu_is (const char *cpu);
> int set_mock_cpu_supports (const char *isa);
> int init_mock_cpu (); // Clear the values of the mock cpu.
>
> With this support, here is how you can test for code coverage of the
> "sse" version and "default version of foo in the above example:
>
> int main ()
> {
>   // Test SSE version.
>if (__builtin_cpu_supports ("sse"))
>{
>  init_mock_cpu();
>  set_mock_cpu_supports ("sse");
>  assert (foo () == 1);
>}
>   // Test default version.
>   init_mock_cpu();
>   assert (foo () == 0);
> }
>
> Invoking a multiversioned binary several times with appropriate mock
> cpu values for the various ISAs and CPUs will give the complete code
> coverage desired. Ofcourse, the underlying platform should be able to
> support the various features.
>

It is the other way around -- it simplifies unit test writing and
running -- one unit test just need to be run on the same hardware
(with the most hw features) *ONCE* and all the versions can be
covered.



> Note that the above test will work only with -fmv-debug as the
> dispatcher must be invoked on every multiversioned call to be able to
> dynamically change the version.
>
> Multiple ISA features can be set in the mock cpu by calling
> "set_mock_cpu_supports" several times with different ISA names.
> Calling "init_mock_cpu" will clear all the values. "set_mock_cpu_is"
> will set the CPU type.
>


Just through about another idea. Is it possible for compiler to create
some alias for each version so that they can be accessed explicitly,
just like the use of :: ?

if (__buitin_cpu_supports ("sse"))
   CHECK_RESULT (foo_sse (...));

CHECK_RESULT (foo_default(...));

...

David


> This patch only includes the gcc changes.  I will separately prepare a
> patch for the libgcc changes. Right now, since the libgcc changes are
> not available the two new mock cpu builtins check the real CPU like
> "__builtin_cpu_is" and "__builtin_cpu_supports".
>
> Patch attached.  Please look at mv14_debug_code_coverage.C for an
> exhaustive example of testing for code coverage in the presence of
> multiple versions.
>
> Comments please.
>
> Thanks
> Sri


Re: [google][4.7]Using CPU mocks to test code coverage of multiversioned functions

2013-03-15 Thread Sriraman Tallam
On Fri, Mar 15, 2013 at 3:37 PM, Xinliang David Li  wrote:
> On Fri, Mar 15, 2013 at 2:55 PM, Sriraman Tallam  wrote:
>> Hi,
>>
>>This patch is meant for google/gcc-4_7 but I want this to be
>> considered for trunk when it opens again. This patch makes it easy to
>> test for code coverage of multiversioned functions. Here is a
>> motivating example:
>>
>> __attribute__((target ("default"))) int foo () { ... return 0; }
>> __attribute__((target ("sse"))) int foo () { ... return 1; }
>> __attribute__((target ("popcnt"))) int foo () { ... return 2; }
>>
>> int main ()
>> {
>>   return foo();
>> }
>>
>> Lets say your test CPU supports popcnt.  A run of this program will
>> invoke the popcnt version of foo (). Then, how do we test the sse
>> version of foo()? To do that for the above example, we need to run
>> this code on a CPU that has sse support but no popcnt support.
>> Otherwise, we need to comment out the popcnt version and run this
>> example. This can get painful when there are many versions. The same
>> argument applies to testing  the default version of foo.
>>
>> So, I am introducing the ability to mock a CPU. If the CPU you are
>> testing on supports sse, you should be able to test the sse version.
>>
>> First, I have introduced a new flag called -fmv-debug.  This patch
>> invokes the function version dispatcher every time a call to a foo ()
>> is made. Without that flag, the version dispatch happens once at
>> startup time via the IFUNC mechanism.
>>
>> Also, with -fmv-debug, the version dispatcher uses the two new
>> builtins "__builtin_mock_cpu_is" and "__builtin_mock_cpu_supports" to
>> check the cpu type and cpu isa.
>
> With this option, compiler probably can also define some macros so
> that if user can use to write overriding hooks.
>
>>
>> Then, I plan to add the following hooks to libgcc (in a different patch) :
>>
>> int set_mock_cpu_is (const char *cpu);
>> int set_mock_cpu_supports (const char *isa);
>> int init_mock_cpu (); // Clear the values of the mock cpu.
>>
>> With this support, here is how you can test for code coverage of the
>> "sse" version and "default version of foo in the above example:
>>
>> int main ()
>> {
>>   // Test SSE version.
>>if (__builtin_cpu_supports ("sse"))
>>{
>>  init_mock_cpu();
>>  set_mock_cpu_supports ("sse");
>>  assert (foo () == 1);
>>}
>>   // Test default version.
>>   init_mock_cpu();
>>   assert (foo () == 0);
>> }
>>
>> Invoking a multiversioned binary several times with appropriate mock
>> cpu values for the various ISAs and CPUs will give the complete code
>> coverage desired. Ofcourse, the underlying platform should be able to
>> support the various features.
>>
>
> It is the other way around -- it simplifies unit test writing and
> running -- one unit test just need to be run on the same hardware
> (with the most hw features) *ONCE* and all the versions can be
> covered.


Yes,  the test needs to run just once, potentially, if the test
platform can support all of the features.

>
>
>
>> Note that the above test will work only with -fmv-debug as the
>> dispatcher must be invoked on every multiversioned call to be able to
>> dynamically change the version.
>>
>> Multiple ISA features can be set in the mock cpu by calling
>> "set_mock_cpu_supports" several times with different ISA names.
>> Calling "init_mock_cpu" will clear all the values. "set_mock_cpu_is"
>> will set the CPU type.
>>
>
>
> Just through about another idea. Is it possible for compiler to create
> some alias for each version so that they can be accessed explicitly,
> just like the use of :: ?
>
> if (__buitin_cpu_supports ("sse"))
>CHECK_RESULT (foo_sse (...));
>
> CHECK_RESULT (foo_default(...));

This will work for this example. But, in general, this means changing
the call site of every multiversioned call and that can become
infeasible.

Thanks
Sri


>
> ...
>
> David
>
>
>> This patch only includes the gcc changes.  I will separately prepare a
>> patch for the libgcc changes. Right now, since the libgcc changes are
>> not available the two new mock cpu builtins check the real CPU like
>> "__builtin_cpu_is" and "__builtin_cpu_supports".
>>
>> Patch attached.  Please look at mv14_debug_code_coverage.C for an
>> exhaustive example of testing for code coverage in the presence of
>> multiple versions.
>>
>> Comments please.
>>
>> Thanks
>> Sri


[lra] patch to fix s390 testsuite failures

2013-03-15 Thread Vladimir Makarov
The following patch fixes all s390 GCC testsuite failures (in comparison 
with reloads).  The problem was in unaligned access in a shared library 
code which was result of unaligned stack of generated code.


The patch was also successfully bootstrapped on x86/x86-64.

Committed as rev. 196685.

2013-03-15  Vladimir Makarov 

* lra.c (lra): Align non-empty stack frame.
* lra-spills.c (lra_spill): Align stack after spilling pseudos.

Index: ChangeLog
===
--- ChangeLog   (revision 196598)
+++ ChangeLog   (working copy)
@@ -1,3 +1,8 @@
+2013-03-15  Vladimir Makarov  
+
+   * lra.c (lra): Allign non-empty stack frame.
+   * lra-spills.c (lra_spill): Allign stack after spilling pseudos.
+
 2013-03-08  Vladimir Makarov  
 
* lra-constraints.c (process_alt_operands): Don't penalize
Index: lra-spills.c
===
--- lra-spills.c(revision 196598)
+++ lra-spills.c(working copy)
@@ -548,6 +548,11 @@ lra_spill (void)
   for (i = 0; i < n; i++)
 if (pseudo_slots[pseudo_regnos[i]].mem == NULL_RTX)
   assign_mem_slot (pseudo_regnos[i]);
+  if (n > 0 && crtl->stack_alignment_needed)
+/* If we have a stack frame, we must align it now.  The stack size
+   may be a part of the offset computation for register
+   elimination.  */
+assign_stack_local (BLKmode, 0, crtl->stack_alignment_needed);
   if (lra_dump_file != NULL)
 {
   for (i = 0; i < slots_num; i++)
Index: lra.c
===
--- lra.c   (revision 196598)
+++ lra.c   (working copy)
@@ -2272,6 +2272,11 @@ lra (FILE *f)
   bitmap_initialize (&lra_split_regs, ®_obstack);
   bitmap_initialize (&lra_optional_reload_pseudos, ®_obstack);
   live_p = false;
+  if (get_frame_size () != 0 && crtl->stack_alignment_needed)
+/* If we have a stack frame, we must align it now.  The stack size
+   may be a part of the offset computation for register
+   elimination.  */
+assign_stack_local (BLKmode, 0, crtl->stack_alignment_needed);
   for (;;)
 {
   for (;;)



[ira-improv] merged with trunk

2013-03-15 Thread Vladimir Makarov

The branch was merged with trunk @ r196686.

The branch was successfully bootstrapped on x86 and x86-64.

Committed as rev. 196689.


Re: [google][4.7]Using CPU mocks to test code coverage of multiversioned functions

2013-03-15 Thread Xinliang David Li
Ok. If the use case is to enable the test of  the same application
binary (not the per function unit test) with CPU mocking at runtime
(via environment variable or application specific flags), the proposed
changes make sense.

David

On Fri, Mar 15, 2013 at 3:49 PM, Sriraman Tallam  wrote:
> On Fri, Mar 15, 2013 at 3:37 PM, Xinliang David Li  wrote:
>> On Fri, Mar 15, 2013 at 2:55 PM, Sriraman Tallam  wrote:
>>> Hi,
>>>
>>>This patch is meant for google/gcc-4_7 but I want this to be
>>> considered for trunk when it opens again. This patch makes it easy to
>>> test for code coverage of multiversioned functions. Here is a
>>> motivating example:
>>>
>>> __attribute__((target ("default"))) int foo () { ... return 0; }
>>> __attribute__((target ("sse"))) int foo () { ... return 1; }
>>> __attribute__((target ("popcnt"))) int foo () { ... return 2; }
>>>
>>> int main ()
>>> {
>>>   return foo();
>>> }
>>>
>>> Lets say your test CPU supports popcnt.  A run of this program will
>>> invoke the popcnt version of foo (). Then, how do we test the sse
>>> version of foo()? To do that for the above example, we need to run
>>> this code on a CPU that has sse support but no popcnt support.
>>> Otherwise, we need to comment out the popcnt version and run this
>>> example. This can get painful when there are many versions. The same
>>> argument applies to testing  the default version of foo.
>>>
>>> So, I am introducing the ability to mock a CPU. If the CPU you are
>>> testing on supports sse, you should be able to test the sse version.
>>>
>>> First, I have introduced a new flag called -fmv-debug.  This patch
>>> invokes the function version dispatcher every time a call to a foo ()
>>> is made. Without that flag, the version dispatch happens once at
>>> startup time via the IFUNC mechanism.
>>>
>>> Also, with -fmv-debug, the version dispatcher uses the two new
>>> builtins "__builtin_mock_cpu_is" and "__builtin_mock_cpu_supports" to
>>> check the cpu type and cpu isa.
>>
>> With this option, compiler probably can also define some macros so
>> that if user can use to write overriding hooks.
>>
>>>
>>> Then, I plan to add the following hooks to libgcc (in a different patch) :
>>>
>>> int set_mock_cpu_is (const char *cpu);
>>> int set_mock_cpu_supports (const char *isa);
>>> int init_mock_cpu (); // Clear the values of the mock cpu.
>>>
>>> With this support, here is how you can test for code coverage of the
>>> "sse" version and "default version of foo in the above example:
>>>
>>> int main ()
>>> {
>>>   // Test SSE version.
>>>if (__builtin_cpu_supports ("sse"))
>>>{
>>>  init_mock_cpu();
>>>  set_mock_cpu_supports ("sse");
>>>  assert (foo () == 1);
>>>}
>>>   // Test default version.
>>>   init_mock_cpu();
>>>   assert (foo () == 0);
>>> }
>>>
>>> Invoking a multiversioned binary several times with appropriate mock
>>> cpu values for the various ISAs and CPUs will give the complete code
>>> coverage desired. Ofcourse, the underlying platform should be able to
>>> support the various features.
>>>
>>
>> It is the other way around -- it simplifies unit test writing and
>> running -- one unit test just need to be run on the same hardware
>> (with the most hw features) *ONCE* and all the versions can be
>> covered.
>
>
> Yes,  the test needs to run just once, potentially, if the test
> platform can support all of the features.
>
>>
>>
>>
>>> Note that the above test will work only with -fmv-debug as the
>>> dispatcher must be invoked on every multiversioned call to be able to
>>> dynamically change the version.
>>>
>>> Multiple ISA features can be set in the mock cpu by calling
>>> "set_mock_cpu_supports" several times with different ISA names.
>>> Calling "init_mock_cpu" will clear all the values. "set_mock_cpu_is"
>>> will set the CPU type.
>>>
>>
>>
>> Just through about another idea. Is it possible for compiler to create
>> some alias for each version so that they can be accessed explicitly,
>> just like the use of :: ?
>>
>> if (__buitin_cpu_supports ("sse"))
>>CHECK_RESULT (foo_sse (...));
>>
>> CHECK_RESULT (foo_default(...));
>
> This will work for this example. But, in general, this means changing
> the call site of every multiversioned call and that can become
> infeasible.
>
> Thanks
> Sri
>
>
>>
>> ...
>>
>> David
>>
>>
>>> This patch only includes the gcc changes.  I will separately prepare a
>>> patch for the libgcc changes. Right now, since the libgcc changes are
>>> not available the two new mock cpu builtins check the real CPU like
>>> "__builtin_cpu_is" and "__builtin_cpu_supports".
>>>
>>> Patch attached.  Please look at mv14_debug_code_coverage.C for an
>>> exhaustive example of testing for code coverage in the presence of
>>> multiple versions.
>>>
>>> Comments please.
>>>
>>> Thanks
>>> Sri


[lra] Merged with trunk

2013-03-15 Thread Vladimir Makarov

LRA branch has been merged with trunk @ 196686.

The branch was successfully bootstrapped on x86/x86-64.

Committed as rev. 196690.


Re: *Ping* [Patch, libfortran] PR51825 - Fortran runtime error: Cannot match namelist object name

2013-03-15 Thread Jerry DeLisle

On 03/15/2013 01:42 PM, Tilo Schwarz wrote:

Hi,

this is a ping for

[Patch, libfortran] PR51825 - Fortran runtime error: Cannot match namelist
object name
http://gcc.gnu.org/ml/gcc-patches/2013-03/msg00316.html


Regards,

 Tilo



OK, once trunk opens. Thanks for patch.  Do you have commit rights yet?

Jerry


[patch] fix libstdc++/56492

2013-03-15 Thread Jonathan Wakely
This fixes a non-conformance issue in std::packaged_task which we've
decided should be addressed for 4.8

std::function cannot be used with non-CopyConstructible targets, so
this replaces std::function in the implementation of
std::packaged_task.

PR libstdc++/56492
* include/std/future (__future_base::_Result): Add result_type
typedef.
(__future_base::_S_allocate_result): Overload for std::allocator.
(__future_base::_Task_setter): Use _Result::result_type instead of
deducing the type from the task.
(__future_base::_Task_state): Store allocator to allow shared state
to be reset.  Replace std::function with member of target object type
accessed via ...
(__future_base::_Task_state_base): New abstract base class.
(__future_base::_Task_state_base::_M_run): New virtual function to
invoke type-erased target object.
(__future_base::_Task_state_base::_M_reset): New virtual function to
create new shared_state using same target object and allocator.
(__future_base::__create_task_state): Allocate a new _Task_state.
(packaged_task::packaged_task): Use __create_task_state.
(packaged_task::reset): Use _Task_state_base::_M_reset.
* testsuite/30_threads/packaged_task/cons/56492.cc: New.

Tested x86_64-linux, committed to trunk.
commit c3e4bcc3530743e86f7d2b4dec44785a66117386
Author: Jonathan Wakely 
Date:   Sat Mar 16 02:47:42 2013 +

PR libstdc++/56492
* include/std/future (__future_base::_Result): Add result_type
typedef.
(__future_base::_S_allocate_result): Overload for std::allocator.
(__future_base::_Task_setter): Use _Result::result_type instead of
deducing the type from the task.
(__future_base::_Task_state): Store allocator to allow shared state
to be reset.  Replace std::function with member of target object type
accessed via ...
(__future_base::_Task_state_base): New abstract base class.
(__future_base::_Task_state_base::_M_run): New virtual function to
invoke type-erased target object.
(__future_base::_Task_state_base::_M_reset): New virtual function to
create new shared_state using same target object and allocator.
(__future_base::__create_task_state): Allocate a new _Task_state.
(packaged_task::packaged_task): Use __create_task_state.
(packaged_task::reset): Use _Task_state_base::_M_reset.
* testsuite/30_threads/packaged_task/cons/56492.cc: New.

diff --git a/libstdc++-v3/include/std/future b/libstdc++-v3/include/std/future
index 6cccd3d..30100fe 100644
--- a/libstdc++-v3/include/std/future
+++ b/libstdc++-v3/include/std/future
@@ -214,6 +214,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool_M_initialized;
 
   public:
+   typedef _Res result_type;
+
_Result() noexcept : _M_initialized() { }

~_Result()
@@ -281,17 +283,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 typename __traits::allocator_type __a2(__a);
 __result_type* __p = __traits::allocate(__a2, 1);
 __try
-   {
- __traits::construct(__a2, __p, __a);
-}
+ {
+   __traits::construct(__a2, __p, __a);
+ }
 __catch(...)
-{
- __traits::deallocate(__a2, __p, 1);
-  __throw_exception_again;
-}
+ {
+   __traits::deallocate(__a2, __p, 1);
+   __throw_exception_again;
+ }
 return _Ptr<__result_type>(__p);
   }
 
+template
+  static _Ptr<_Result<_Res>>
+  _S_allocate_result(const std::allocator<_Tp>& __a)
+  {
+   return _Ptr<_Result<_Res>>(new _Result<_Res>);
+  }
 
 /// Base class for state between a promise and one or more
 /// associated futures.
@@ -482,6 +490,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   class _Async_state_impl;
 
 template
+  class _Task_state_base;
+
+template
   class _Task_state;
 
 template
@@ -492,24 +503,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   static std::shared_ptr<_State_base>
   _S_make_async_state(_BoundFn&& __fn);
 
-template
+template
   struct _Task_setter;
 
 template
-  class _Task_setter_helper
-  {
-   typedef typename remove_reference<_BoundFn>::type::result_type __res;
-  public:
-   typedef _Task_setter<_Res_ptr, __res> __type;
-  };
-
-template
-  static typename _Task_setter_helper<_Res_ptr, _BoundFn>::__type
+  static _Task_setter<_Res_ptr>
   _S_task_setter(_Res_ptr& __ptr, _BoundFn&& __call)
   {
-   typedef _Task_setter_helper<_Res_ptr, _BoundFn> __helper_type;
-   typedef typename __helper_type::__type _Setter;
-   return _Setter{ __ptr, std::ref(__call) };
+   return _Task_setter<_Res_ptr>{ __ptr, std::ref(__call) };
   }
   };
 
@@ -517,6 +519,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   templa