Re: [PATCH] Use new dump scheme to emit loop unroll/peel summary info (issue6941070)

2012-12-20 Thread Bernhard Reutner-Fischer
On Mon, Dec 17, 2012 at 10:44:59PM -0800, Teresa Johnson wrote:
>Index: tree-ssa-loop-ivcanon.c
>===
>--- tree-ssa-loop-ivcanon.c(revision 194516)
>+++ tree-ssa-loop-ivcanon.c(working copy)
>@@ -639,22 +639,24 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
> 
> /* Tries to unroll LOOP completely, i.e. NITER times.
>UL determines which loops we are allowed to unroll.
>-   EXIT is the exit of the loop that should be eliminated.  
>+   EXIT is the exit of the loop that should be eliminated.
>MAXITER specfy bound on number of iterations, -1 if it is
>-   not known or too large for HOST_WIDE_INT.  */
>+   not known or too large for HOST_WIDE_INT. The location
>+   LOCUS corresponding to the loop is used when emitting
>+   a summary of the unroll to the dump file.  */
> 
> static bool
> try_unroll_loop_completely (struct loop *loop,
>   edge exit, tree niter,
>   enum unroll_level ul,
>-  HOST_WIDE_INT maxiter)
>+  HOST_WIDE_INT maxiter,
>+location_t locus)

whitespace damage?

>Index: loop-unroll.c
>===
>--- loop-unroll.c  (revision 194516)
>+++ loop-unroll.c  (working copy)
>@@ -148,6 +148,61 @@ static void combine_var_copies_in_loop_exit (struc
>basic_block);
> static rtx get_expansion (struct var_to_expand *);
> 
>+/* Emit a message summarizing the unroll or peel that will be
>+   performed for LOOP, along with the loop's location LOCUS, if
>+   appropriate given the dump or -fopt-info settings.  */
>+
>+static void
>+report_unroll_peel(struct loop *loop, location_t locus)

missing space before (

contrib/check_GNU_style.sh generally says:
Dot, space, space, new sentence.
loop-dump.01.patch:223:+   not known or too large for HOST_WIDE_INT. The 
location
loop-dump.01.patch:514:+   * of the for or while statement, if possible. To do 
this, look

Dot, space, space, end of comment.
loop-dump.01.patch:504:+/* Return location corresponding to the loop control 
condition if possible. */
loop-dump.01.patch:541:+  /* Next check the latch, to see if it is non-empty. *
loop-dump.01.patch:555:+  /* If all else fails, simply return the current 
function location. */

There should be exactly one space between function name and parentheses.
loop-dump.01.patch:329:+report_unroll_peel(struct loop *loop, location_t locus)
loop-dump.01.patch:386:+  location_t locus = get_loop_location(loop);
loop-dump.01.patch:404:+  report_unroll_peel(loop, locus);
loop-dump.01.patch:412:+  location_t locus = get_loop_location(loop);
loop-dump.01.patch:429:+  report_unroll_peel(loop, locus);
loop-dump.01.patch:533:+  if ((exit = single_exit(loop)))

>@@ -248,6 +305,7 @@ peel_loops_completely (int flags)
> 
>   if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
>   {
>+  report_unroll_peel(loop, locus);
> peel_loop_completely (loop);

whitespace damage? You seem to have this kind of whitespace error
throughout the patch. I take it you are aware of
http://gcc.gnu.org/wiki/FormattingCodeForGCC
and just forgot to have it on the machine you edited?

I seemingly have
$ cat ~/.vim/gcc_style.vim 
" put this plugin into ~/.vim/gcc_style.vim and source it into your ~/.vimrc via
" source ~/.vim/gcc_style.vim
if exists("g:loaded_gcc_style") || &cp
  finish
endif
let g:loaded_gcc_style = 1

augroup gcc_style
  autocmd BufReadPost,FileReadPost * call s:maybe_gcc_style()
augroup END
if exists("*s:maybe_gcc_style")
  finish
endif
let s:cpo_save = &cpo
set cpo&vim

function! s:maybe_gcc_style()
  let s:i = 1 + 0
  while s:i <= line("$") && s:i <= 25
let s:line = getline(s:i)
if s:line =~ '^\s*This\sfile\sis\spart\sof\sGCC.*'
  " gcc-mode
  set cino=:s,{s,n-s,>2s,^-s
  set sw=2
  set sts=2
  set cindent
  set smartindent
  set autoindent
  break
else
  let s:i = s:i + 1
endif
  endwhile
endfunction

"command! NoGCCstyle unlet! g:loaded_gcc_style | au! gcc_style
"command! DoGCCstyle runtime gcc_style.vim
let &cpo = s:cpo_save

>Index: cfgloop.c
>===
>--- cfgloop.c  (revision 194516)
>+++ cfgloop.c  (working copy)
>@@ -1666,3 +1666,59 @@ loop_exits_from_bb_p (struct loop *loop, basic_blo
> 
>   return false;
> }
>+
>+/* Return location corresponding to the loop control condition if possible. */
>+
>+location_t
>+get_loop_location (struct loop *loop)
>+{
>+  rtx insn = NULL;
>+  struct niter_desc *desc = NULL;
>+  edge exit;
>+
>+  /* For a for or while loop, we would like to return the location
>+   * of the for or while statement, if possible. To do this, look
>+   * for the branch guarding the loop back-edge.
>+   */

IIRC there is not supposed to be a * in comments.

Other than

[PATCH][ARM][thumb1] Reduce lr save for leaf function with non-far jump

2012-12-20 Thread Joey Ye
Current GCC thumb1 has an annoying problem that always assuming far branch.
So it forces to save lr, even when unnecessarily. The most extreme case
complained by partner is:

// compiled with "-mthumb -mcpu=cortex-m0 -Os".
void foo() { for (;;); }
=>
foo:
push{lr}  // Crazy!!!
.L2:
b   .L2

The reason is that thumb1 far jump is only resolved in the very late pass
"shorten_branch". Prologue/epilogue pass doesn't actually know a branch is
far or not from its attribute. It has to conservatively save/restore lr
whenever there is a branch.

This patch tries to fix it with a simple heuristic, i.e., using function
size to decide if a far jump will likely be used. Function size information
is meaningful in prologue/epilogue pass. The heuristic uses following check
to decide if lr should be saved for far jump:

function_size * 3 >= 2048 // yes: save lr for possible far jump. No: don't
save lr for far jump

The scheme has an issue: if some corner case does break above condition,
there is no chance to fix-up but to ICE. But the heuristic condition is very
conservative. It is base on the worse normal condition that each instruction
is associated with a 4 byte literal ( (2+4)/2=3, blooming size by 3 times ).
I can't think of a real case to trigger the ICE. So I think it should work.

Other approaches than the heuristic scheme are too expensive to implement
for this small size/performance issue. I did explored some but none of them
persuaded myself.

Tests passed:
* build libgcc, libstdc++, newlib, libm
* make check-gcc with cpu=cortex-m0
* Small and extreme test cases

ChangeLog:

2012-12-20  Joey Ye  

* config/arm/arm.c(thumb1_final_prescan_insn): 
Assert lr save for real far jump.
(thumb_far_jump_used_p): Count instruction size and set 
 far_jump_used.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 327ef22..ad79451 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -21790,6 +21857,11 @@ thumb1_final_prescan_insn (rtx insn)
   else if (conds != CONDS_NOCOND)
cfun->machine->thumb1_cc_insn = NULL_RTX;
 }
+
+/* Check if unexpected far jump is used.  */
+if (cfun->machine->lr_save_eliminated
+&& get_attr_far_jump (insn) == FAR_JUMP_YES)
+  internal_error("Unexpected thumb1 far jump");
 }
 
 int
@@ -21815,6 +21887,8 @@ static int
 thumb_far_jump_used_p (void)
 {
   rtx insn;
+  bool far_jump = false;
+  unsigned int func_size = 0;
 
   /* This test is only important for leaf functions.  */
   /* assert (!leaf_function_p ()); */
@@ -21870,6 +21944,26 @@ thumb_far_jump_used_p (void)
  && get_attr_far_jump (insn) == FAR_JUMP_YES
  )
{
+ far_jump = true;
+   }
+  func_size += get_attr_length (insn);
+}
+
+  /* Attribute far_jump will always be true for thumb1 before
shorten_branch
+ pass. So checking far_jump attribute before shorten_branch isn't much
+ useful.
+ 
+ Following heuristic tries to estimate more accruately if a far jump
may 
+ finally be used. The heuristic is very conservative as there is no
chance
+ to roll-back the decision of not to use far jump.
+
+ Thumb1 long branch offset is -2048 to 2046. The worst case is each
2-byte
+ insn is assiociated with a 4 byte constant pool. Using function size 
+ 2048/3 as the threshold is conservative enough.  */
+  if (far_jump)
+{
+  if ((func_size * 3) >= 2048)
+{
  /* Record the fact that we have decided that
 the function does use far jumps.  */
  cfun->machine->far_jump_used = 1;







Re: Patch to enable unlimited polymorphism to gfortran

2012-12-20 Thread Dominique Dhumieres
Dear Paul,

Apparently you have forgotten to commit the update for
same_type_as_1.f03.

Dominique


[Patch, wwwdocs] Update Fortran part of the GCC 4.8 release notes

2012-12-20 Thread Tobias Burnus
The following patch updates the Fortran part of the GCC 4.8 release 
notes at http://gcc.gnu.org/gcc-4.8/changes.html#fortran


It adds quips for
- CLASS(*)
- The new BACKTRACE intrinsic
- A compatibility notice

I would like if someone could comment on the latter. I think it is time 
to explicitly inform about compatibility issues with gfortran. So far, 
smaller ABI changes were done all the time [affecting very special cases 
or very experimental features] and the .mod version was different in 
every release.


(The smaller ABI changes were related to code which required modules, 
hence, the .mod version change forced users to re-compile. In fact, the 
.mod version change in 4.8 has just be done to force recompilation.* 
Thus, the past ABI breakages were and this ABI breakage is very unlikely 
to lead to run-time/link-time issues.)


Comments? Suggestions?

Tobias

* The background for 4.8's ABI changes were: The module name was missing 
from module-defined procedure-pointer variables, leading to a potential 
naming clash with same-name variables in different modules. And the 
deferred-length string ABI was changed as some systems didn't like a "." 
in the assembler name of a variable.
Index: changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-4.8/changes.html,v
retrieving revision 1.73
diff -p -u -r1.73 changes.html
--- changes.html	19 Dec 2012 21:54:50 -	1.73
+++ changes.html	20 Dec 2012 09:43:10 -
@@ -243,6 +243,35 @@ B b(42); // OK
 
 Fortran
   
+Compatibility notice:
+
+  Module files: The version of the module files (.mod)
+has been incremented. Fortran MODULEs compiled by earlier
+GCC versions have to be recompiled, when they are USEd by
+files compiled with GCC 4.8, because GCC 4.8 is not able to read
+.mod file of earlier GCC versions; attempting to do so
+gives an error message. Note: The ABI of the produced assembler data
+itself has not changed; object files and libraries are fully compatible
+to older versions. (Except as noted below.)
+  ABI: Some internal names (name in the assembler/object file) have
+changed for symbols declared in the specification part of a module.
+If the module – or a file using such a symbol via use
+association – is recompiled, the module and all files which
+directly use such symbols have to be recompiled. The change only
+affects the following kind of module symbols:
+
+  Procedure pointers. Note: C-interoperable function pointers
+(type(c_funptr)) are not affected nor are
+procedure-pointer components.
+  Deferred-length character strings.
+
+  
+
+The http://gcc.gnu.org/onlinedocs/gfortran/BACKTRACE.html";>
+BACKTRACE intrinsic subroutine has been added. It shows
+a backtrace at an arbitrary place in user code; program execution
+continues normally afterwards.
+ 
 The http://gcc.gnu.org/onlinedocs/gfortran/Error-and-Warning-Options.html";>
 -Wc-binding-type warning option has been added (disabled
@@ -301,6 +330,12 @@ B b(42); // OK
 http://gcc.gnu.org/onlinedocs/gfortran/TMPDIR.html";>user
 manual.
 
+http://gcc.gnu.org/wiki/Fortran2003Status";>Fortran 2003:
+  Experimental support for unlimited polymorphic variables
+  (CLASS(*)) has been added.
+
+
+
 http://gcc.gnu.org/wiki/TS29113Status";>TS 29113:
 
   Assumed types (TYPE(*)) are now supported.


Re: [PATCH] Fix PR gcov-profile/55734 for bootstrapping with older compilers (issue6980044)

2012-12-20 Thread Jakub Jelinek
On Wed, Dec 19, 2012 at 10:14:26PM -0800, Teresa Johnson wrote:
> Merged this pair into an #elif, but left the outer one (from the IN_LIBGCOV
> check) since it looks clearer.
> 
> New patch:
> 
> 2012-12-19  Teresa Johnson  
> Jakub Jelinek  
> 
> PR gcov-profile/55734
> * gcov-io.c (gcov_read_summary): Use __builtin_popcount instead
> of __builtin_popcountll when building libgcov.a, otherwise use
> popcount_hwi.
> (gcov_histo_index): When not building libgcov.a, use floor_log2
> instead of __builtin_clzll.

Okay, thanks.

Jakub


*ping* [patch, libfortran] Fix PR 30162, write with pipes

2012-12-20 Thread Thomas Koenig

Ping?

Thomas


Hi Janus,

Oops, right.  Here is the correct one.

Regards

 Thomas


wrong patch attached? It contains a hunk in frontend-passes.c, which
seems totally unrelated ...

Cheers,
Janus



2012/12/15 Thomas Koenig :

Hello world,

the attached patch fixes the regression and regtests cleanly.
No test case because I could not find anything portable
to create a FIFO in the testsuite.

OK for trunk and 4.7?

 Thomas

2012-12-15  Thomas Koenig  

 PR libfortran/30162
 * io/unix.c (raw_tell):  If the lseek is done on a
 non-seekable file, return 0.









RE: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread Melik-adamyan, Areg
We checked,  no significant gains or losses.

-Original Message-
From: H.J. Lu [mailto:hjl.to...@gmail.com] 
Sent: Friday, December 14, 2012 1:03 AM
To: Jan Hubicka
Cc: Jakub Jelinek; Xinliang David Li; GCC Patches; Teresa Johnson; 
Melik-adamyan, Areg
Subject: Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

On Thu, Dec 13, 2012 at 12:40 PM, Jan Hubicka  wrote:
>> > Here we speak about memcpy/memset only.  I never got around to 
>> > modernize strlen and friends, unfortunately...
>> >
>> > memcmp and friends are different beats.  They realy need some TLC...
>>
>> memcpy and memset in glibc are also extremely fast.
>
> The default strategy now is to inline only when the block is known to 
> be small (either constant or via profile feedback, we do not really 
> use the info on upper bound of size of the copied object that would be 
> useful but not readilly available at expansion time).
>
> You can try the test_stringop script I attached and send me the 
> results.  For

Areg, can you give it a try?  Thanks.

> me libc starts to be win only for rather large blocks (i.e. >8KB)
>

Which glibc are you using?

--
H.J.


[PATCH] Fix PR55740

2012-12-20 Thread Richard Biener

The following fixes a fixup for loops when merging two basic-blocks.
We didn't handle merging two loop headers well which the following
patch addresses.

LTO bootstrapped (which was broken before this patch) and tested
on x86_64-unknown-linux-gnu, applied.

Richard.

2012-12-20  Richard Biener  

PR middle-end/55740
* cfghooks.c (merge_blocks): Properly handle merging of
two loop headers.

* g++.dg/torture/pr55740.C: New testcase.

Index: gcc/cfghooks.c
===
*** gcc/cfghooks.c  (revision 194610)
--- gcc/cfghooks.c  (working copy)
*** merge_blocks (basic_block a, basic_block
*** 724,734 
  
cfg_hooks->merge_blocks (a, b);
  
-   /* If we merge a loop header into its predecessor, update the loop
-  structure.  */
if (current_loops != NULL)
  {
!   if (b->loop_father->header == b)
{
  remove_bb_from_loops (a);
  add_bb_to_loop  (a, b->loop_father);
--- 724,746 
  
cfg_hooks->merge_blocks (a, b);
  
if (current_loops != NULL)
  {
!   /* If the block we merge into is a loop header do nothing unless ... */
!   if (a->loop_father->header == a)
!   {
! /* ... we merge two loop headers, in which case we kill
!the inner loop.  */
! if (b->loop_father->header == b)
!   {
! b->loop_father->header = NULL;
! b->loop_father->latch = NULL;
! loops_state_set (LOOPS_NEED_FIXUP);
!   }
!   }
!   /* If we merge a loop header into its predecessor, update the loop
!structure.  */
!   else if (b->loop_father->header == b)
{
  remove_bb_from_loops (a);
  add_bb_to_loop  (a, b->loop_father);
Index: gcc/testsuite/g++.dg/torture/pr55740.C
===
*** gcc/testsuite/g++.dg/torture/pr55740.C  (revision 0)
--- gcc/testsuite/g++.dg/torture/pr55740.C  (working copy)
***
*** 0 
--- 1,19 
+ // { dg-do compile }
+ 
+ static bool st_IsPathDelimiter( char c ) { return c == '/'; }
+ bool IsValidPath( char const * filename )
+ {
+   if ( !filename || filename[0] == 0 ) 
+ return false;
+   char const * run = filename;
+   while ( run && *run )   
+ {
+   if ( run[0] == '.' )   
+   if ( run[1] != '.' || ( !st_IsPathDelimiter( run[2] ) && run[2] != 0 ) 
)   
+ return false;   
+   while ( *run && !st_IsPathDelimiter( *run ) )
+   ++run;
+   if ( *run ) 
+   ++run;
+ }
+ }


Re: [PATCH] Fix PR55740

2012-12-20 Thread Steven Bosscher
On Thu, Dec 20, 2012 at 1:43 PM, Richard Biener wrote:
> --- 724,746 
>
> cfg_hooks->merge_blocks (a, b);
>
> if (current_loops != NULL)
>   {
> !   /* If the block we merge into is a loop header do nothing unless ... 
> */
> !   if (a->loop_father->header == a)
> !   {
> ! /* ... we merge two loop headers, in which case we kill
> !the inner loop.  */

Before loops were maintained, we'd simply re-discover the nested
loops. Do we now lose this information?

How about adjusting the can_merge_blocks hook to reject merging loop headers?

Ciao!
Steven


Re: [PATCH] Fix PR55740

2012-12-20 Thread Richard Biener
On Thu, 20 Dec 2012, Steven Bosscher wrote:

> On Thu, Dec 20, 2012 at 1:43 PM, Richard Biener wrote:
> > --- 724,746 
> >
> > cfg_hooks->merge_blocks (a, b);
> >
> > if (current_loops != NULL)
> >   {
> > !   /* If the block we merge into is a loop header do nothing unless 
> > ... */
> > !   if (a->loop_father->header == a)
> > !   {
> > ! /* ... we merge two loop headers, in which case we kill
> > !the inner loop.  */
> 
> Before loops were maintained, we'd simply re-discover the nested
> loops. Do we now lose this information?

When we merge loop headers we effectively merge two loops (in this
case cross-jumping merged the latches and the headers).  We re-discover
the nest when multiple latches remain and we disambiguate loops
with multiple latches.

> How about adjusting the can_merge_blocks hook to reject merging loop headers?

I didn't want to do that, but sure - that's another possibility.  In
this case the outer loop is really unnecessary.

Richard.


Re: [Patch, wwwdocs] Update Fortran part of the GCC 4.8 release notes

2012-12-20 Thread Paul Richard Thomas
Dear Tobias,

Could you note that class(*) is complete up to the restriction to
fixed length character values only?

Thanks

Paul

On 20 December 2012 10:55, Tobias Burnus  wrote:
> The following patch updates the Fortran part of the GCC 4.8 release notes at
> http://gcc.gnu.org/gcc-4.8/changes.html#fortran
>
> It adds quips for
> - CLASS(*)
> - The new BACKTRACE intrinsic
> - A compatibility notice
>
> I would like if someone could comment on the latter. I think it is time to
> explicitly inform about compatibility issues with gfortran. So far, smaller
> ABI changes were done all the time [affecting very special cases or very
> experimental features] and the .mod version was different in every release.
>
> (The smaller ABI changes were related to code which required modules, hence,
> the .mod version change forced users to re-compile. In fact, the .mod
> version change in 4.8 has just be done to force recompilation.* Thus, the
> past ABI breakages were and this ABI breakage is very unlikely to lead to
> run-time/link-time issues.)
>
> Comments? Suggestions?
>
> Tobias
>
> * The background for 4.8's ABI changes were: The module name was missing
> from module-defined procedure-pointer variables, leading to a potential
> naming clash with same-name variables in different modules. And the
> deferred-length string ABI was changed as some systems didn't like a "." in
> the assembler name of a variable.



-- 
The knack of flying is learning how to throw yourself at the ground and miss.
   --Hitchhikers Guide to the Galaxy


[PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Richard Biener

In the PR we perform expression replacement of an FP operation
across a builtin call that sets the FP control register.  This
patch restricts replacement across calls further, from allowing
all builtins to only allowing those without side-effects.

Allowing replacement over calls at all was to not pessimize
FP code generation for example for sqrt which is most often
expanded to a single instruction.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Comments?

Thanks,
Richard.

2012-12-20  Richard Biener  

PR middle-end/55752
* tree-ssa-ter.c (find_replaceable_in_bb): Only allow replacing
across calls with no side-effects.

Index: gcc/tree-ssa-ter.c
===
*** gcc/tree-ssa-ter.c  (revision 194632)
--- gcc/tree-ssa-ter.c  (working copy)
*** find_replaceable_in_bb (temp_expr_table_
*** 681,692 
kill_expr (tab, partition);
}
  
!   /* Increment counter if this is a non BUILT_IN call. We allow
!replacement over BUILT_IN calls since many will expand to inline
!insns instead of a true call.  */
if (is_gimple_call (stmt)
! && !((fndecl = gimple_call_fndecl (stmt))
!  && DECL_BUILT_IN (fndecl)))
cur_call_cnt++;
  
/* Now see if we are creating a new expression or not.  */
--- 681,693 
kill_expr (tab, partition);
}
  
!   /* Increment counter if this is not a BUILT_IN call without
!side-effects.  We allow replacement over BUILT_IN calls
!since many will expand to inline insns instead of a true call.  */
if (is_gimple_call (stmt)
! && (!((fndecl = gimple_call_fndecl (stmt))
!   && DECL_BUILT_IN (fndecl))
! || gimple_has_side_effects (stmt)))
cur_call_cnt++;
  
/* Now see if we are creating a new expression or not.  */


Re: [PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Jakub Jelinek
On Thu, Dec 20, 2012 at 02:51:55PM +0100, Richard Biener wrote:
> In the PR we perform expression replacement of an FP operation
> across a builtin call that sets the FP control register.  This
> patch restricts replacement across calls further, from allowing
> all builtins to only allowing those without side-effects.
> 
> Allowing replacement over calls at all was to not pessimize
> FP code generation for example for sqrt which is most often
> expanded to a single instruction.
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> 
> Comments?

Wouldn't it be better to have there a list of known builtins over which it
is fine to do TER?  I'd bet most of memory or string builtins that don't
call malloc/free should be still ok, but they surely have side-effects.

> 2012-12-20  Richard Biener  
> 
>   PR middle-end/55752
>   * tree-ssa-ter.c (find_replaceable_in_bb): Only allow replacing
>   across calls with no side-effects.

Jakub


Re: [PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Richard Biener
On Thu, 20 Dec 2012, Jakub Jelinek wrote:

> On Thu, Dec 20, 2012 at 02:51:55PM +0100, Richard Biener wrote:
> > In the PR we perform expression replacement of an FP operation
> > across a builtin call that sets the FP control register.  This
> > patch restricts replacement across calls further, from allowing
> > all builtins to only allowing those without side-effects.
> > 
> > Allowing replacement over calls at all was to not pessimize
> > FP code generation for example for sqrt which is most often
> > expanded to a single instruction.
> > 
> > Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> > 
> > Comments?
> 
> Wouldn't it be better to have there a list of known builtins over which it
> is fine to do TER?  I'd bet most of memory or string builtins that don't
> call malloc/free should be still ok, but they surely have side-effects.

I'm not sure - the original reason was that replacing across calls
made us spill more because there was a call.  We agreed that replacing
across calls isn't usually a good idea but put in the (admittedly bad)
workaround to still allow doing so across likely-not-calls.
string builtins generally will expand to calls though.

I was thinking of even making it stronger and increment "cur_call_cnt"
when the stmt (even non-call) has side-effects (would for example
cover volatile asms or general volatile touching insns).

Richard.

> > 2012-12-20  Richard Biener  
> > 
> > PR middle-end/55752
> > * tree-ssa-ter.c (find_replaceable_in_bb): Only allow replacing
> > across calls with no side-effects.
> 
>   Jakub
> 
> 

-- 
Richard Biener 
SUSE / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imend


Re: [PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Richard Biener
On Thu, 20 Dec 2012, Richard Biener wrote:

> On Thu, 20 Dec 2012, Jakub Jelinek wrote:
> 
> > On Thu, Dec 20, 2012 at 02:51:55PM +0100, Richard Biener wrote:
> > > In the PR we perform expression replacement of an FP operation
> > > across a builtin call that sets the FP control register.  This
> > > patch restricts replacement across calls further, from allowing
> > > all builtins to only allowing those without side-effects.
> > > 
> > > Allowing replacement over calls at all was to not pessimize
> > > FP code generation for example for sqrt which is most often
> > > expanded to a single instruction.
> > > 
> > > Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> > > 
> > > Comments?
> > 
> > Wouldn't it be better to have there a list of known builtins over which it
> > is fine to do TER?  I'd bet most of memory or string builtins that don't
> > call malloc/free should be still ok, but they surely have side-effects.

Btw, it would need to be a target specific list as most xmm intrinsic
builtins are fine to replace over.

Richard.


Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread H.J. Lu
On Thu, Dec 20, 2012 at 4:13 AM, Melik-adamyan, Areg
 wrote:
> We checked,  no significant gains or losses.
>
> -Original Message-
> From: H.J. Lu [mailto:hjl.to...@gmail.com]
> Sent: Friday, December 14, 2012 1:03 AM
> To: Jan Hubicka
> Cc: Jakub Jelinek; Xinliang David Li; GCC Patches; Teresa Johnson; 
> Melik-adamyan, Areg
> Subject: Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs
>
> On Thu, Dec 13, 2012 at 12:40 PM, Jan Hubicka  wrote:
>>> > Here we speak about memcpy/memset only.  I never got around to
>>> > modernize strlen and friends, unfortunately...
>>> >
>>> > memcmp and friends are different beats.  They realy need some TLC...
>>>
>>> memcpy and memset in glibc are also extremely fast.
>>
>> The default strategy now is to inline only when the block is known to
>> be small (either constant or via profile feedback, we do not really
>> use the info on upper bound of size of the copied object that would be
>> useful but not readilly available at expansion time).
>>
>> You can try the test_stringop script I attached and send me the
>> results.  For
>
> Areg, can you give it a try?  Thanks.
>

Hi Areg,

Did you mean inlined memcpy/memset are as fast as
the ones in libc.so on both ia32 and Intel64?

Please keep in mind that memcpy/memset in libc.a
may not be optimized.  You must not use -static for
linking.

-- 
H.J.


Re: [PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Richard Biener
On Thu, 20 Dec 2012, Richard Biener wrote:

> On Thu, 20 Dec 2012, Jakub Jelinek wrote:
> 
> > On Thu, Dec 20, 2012 at 02:51:55PM +0100, Richard Biener wrote:
> > > In the PR we perform expression replacement of an FP operation
> > > across a builtin call that sets the FP control register.  This
> > > patch restricts replacement across calls further, from allowing
> > > all builtins to only allowing those without side-effects.
> > > 
> > > Allowing replacement over calls at all was to not pessimize
> > > FP code generation for example for sqrt which is most often
> > > expanded to a single instruction.
> > > 
> > > Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> > > 
> > > Comments?
> > 
> > Wouldn't it be better to have there a list of known builtins over which it
> > is fine to do TER?  I'd bet most of memory or string builtins that don't
> > call malloc/free should be still ok, but they surely have side-effects.
> 
> I'm not sure - the original reason was that replacing across calls
> made us spill more because there was a call.  We agreed that replacing
> across calls isn't usually a good idea but put in the (admittedly bad)
> workaround to still allow doing so across likely-not-calls.
> string builtins generally will expand to calls though.
> 
> I was thinking of even making it stronger and increment "cur_call_cnt"
> when the stmt (even non-call) has side-effects (would for example
> cover volatile asms or general volatile touching insns).

Like so:

Index: gcc/tree-ssa-ter.c
===
--- gcc/tree-ssa-ter.c  (revision 194632)
+++ gcc/tree-ssa-ter.c  (working copy)
@@ -681,12 +681,13 @@ find_replaceable_in_bb (temp_expr_table_
kill_expr (tab, partition);
}
 
-  /* Increment counter if this is a non BUILT_IN call. We allow
-replacement over BUILT_IN calls since many will expand to inline
-insns instead of a true call.  */
-  if (is_gimple_call (stmt)
- && !((fndecl = gimple_call_fndecl (stmt))
-  && DECL_BUILT_IN (fndecl)))
+  /* Increment counter if this is not a BUILT_IN call or a stmt with
+side-effects.  We allow replacement over BUILT_IN calls
+since many will expand to inline insns instead of a true call.  
*/
+  if (gimple_has_side_effects (stmt)
+ || (is_gimple_call (stmt)
+ && !((fndecl = gimple_call_fndecl (stmt))
+  && DECL_BUILT_IN (fndecl
cur_call_cnt++;
 
   /* Now see if we are creating a new expression or not.  */

Richard.


[patch] fix libstdc++/55741 - use Sleep on mingw

2012-12-20 Thread Jonathan Wakely
PR libstdc++/55741
* acinclude.m4 (GLIBCXX_ENABLE_LIBSTDCXX_TIME): Check for Sleep.
* config.h.in: Regenerate.
* configure: Regenerate.
* src/c++11/thread.cc (__sleep_for): Use Sleep if available.

Tested by Kai (thanks), committed to trunk.
commit 1149c65a987eba50ad0138a48729b020e7d8d0bd
Author: Jonathan Wakely 
Date:   Thu Dec 20 14:29:54 2012 +

PR libstdc++/55741
* acinclude.m4 (GLIBCXX_ENABLE_LIBSTDCXX_TIME): Check for Sleep.
* config.h.in: Regenerate.
* configure: Regenerate.
* src/c++11/thread.cc (__sleep_for): Use Sleep if available.

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 281ee7e..2d4d7f0 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -1301,6 +1301,17 @@ AC_DEFUN([GLIBCXX_ENABLE_LIBSTDCXX_TIME], [
   AC_MSG_RESULT($ac_has_usleep)
   fi
 
+  if test x"$ac_has_nanosleep$ac_has_sleep" = x"nono"; then
+  AC_MSG_CHECKING([for Sleep])
+  AC_TRY_COMPILE([#include ],
+ [Sleep(1)],
+ [ac_has_win32_sleep=yes],[ac_has_win32_sleep=no])
+  if test x"$ac_has_win32_sleep" = x"yes"; then
+AC_DEFINE(HAVE_WIN32_SLEEP,1, [Defined if Sleep exists.])
+  fi
+  AC_MSG_RESULT($ac_has_win32_sleep)
+  fi
+
   AC_SUBST(GLIBCXX_LIBS)
 
   CXXFLAGS="$ac_save_CXXFLAGS"
diff --git a/libstdc++-v3/src/c++11/thread.cc b/libstdc++-v3/src/c++11/thread.cc
index fa86a1b..b04e6dc 100644
--- a/libstdc++-v3/src/c++11/thread.cc
+++ b/libstdc++-v3/src/c++11/thread.cc
@@ -61,6 +61,8 @@ static inline int get_nprocs()
 #ifndef _GLIBCXX_USE_NANOSLEEP
 # ifdef _GLIBCXX_HAVE_SLEEP
 #  include 
+# elif defined(_GLIBCXX_HAVE_WIN32_SLEEP)
+#  include 
 # else
 #  error "No sleep function known for this target"
 # endif
@@ -170,9 +172,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
static_cast(__ns.count())
   };
 ::nanosleep(&__ts, 0);
-#else
-# ifdef _GLIBCXX_HAVE_SLEEP
-#  ifdef _GLIBCXX_HAVE_USLEEP
+#elif defined(_GLIBCXX_HAVE_SLEEP)
+# ifdef _GLIBCXX_HAVE_USLEEP
 ::sleep(__s.count());
 if (__ns.count() > 0)
   {
@@ -181,10 +182,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __us = 1;
 ::usleep(__us);
   }
-#  else
+# else
 ::sleep(__s.count() + (__ns >= 100));
-#  endif
 # endif
+#elif defined(_GLIBCXX_HAVE_WIN32_SLEEP)
+unsigned long ms = __ns.count() / 100;
+if (__ns.count() > 0 && ms == 0)
+  ms = 1;
+::Sleep(chrono::milliseconds(__s).count() + ms);
 #endif
   }
 


Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread Jan Hubicka
> Hi Areg,
> 
> Did you mean inlined memcpy/memset are as fast as
> the ones in libc.so on both ia32 and Intel64?

I would be interested in output of the stringop script.
> 
> Please keep in mind that memcpy/memset in libc.a
> may not be optimized.  You must not use -static for
> linking.

In my setup I use dynamic linking...
(this is quite anoying property in general - people tend to use --static for
performance critical binaries to save expenses of PIC.  It would be really cool
to have way to call proper stringops based on -march switch)

Honza
> 
> -- 
> H.J.


Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread Jan Hubicka
> > Hi Areg,
> > 
> > Did you mean inlined memcpy/memset are as fast as
> > the ones in libc.so on both ia32 and Intel64?
> 
> I would be interested in output of the stringop script.

Also as far as I can remember, none of spec2k6 benchmarks is really stringop
bound.  On Spec2k GCC was quite bound by memset (within alloc_rtx and bitmap
oprations) but mostly by collecting page faults there.  Inlining that one made
quite a lot of difference on K8 hardware, but not on later chips.

Honza


Re: [PATCH] Further restrict TER replacing over calls (PR55752)

2012-12-20 Thread Richard Biener
On Thu, 20 Dec 2012, Richard Biener wrote:

> On Thu, 20 Dec 2012, Richard Biener wrote:
> 
> > On Thu, 20 Dec 2012, Jakub Jelinek wrote:
> > 
> > > On Thu, Dec 20, 2012 at 02:51:55PM +0100, Richard Biener wrote:
> > > > In the PR we perform expression replacement of an FP operation
> > > > across a builtin call that sets the FP control register.  This
> > > > patch restricts replacement across calls further, from allowing
> > > > all builtins to only allowing those without side-effects.
> > > > 
> > > > Allowing replacement over calls at all was to not pessimize
> > > > FP code generation for example for sqrt which is most often
> > > > expanded to a single instruction.
> > > > 
> > > > Bootstrap and regtest running on x86_64-unknown-linux-gnu.
> > > > 
> > > > Comments?
> > > 
> > > Wouldn't it be better to have there a list of known builtins over which it
> > > is fine to do TER?  I'd bet most of memory or string builtins that don't
> > > call malloc/free should be still ok, but they surely have side-effects.
> > 
> > I'm not sure - the original reason was that replacing across calls
> > made us spill more because there was a call.  We agreed that replacing
> > across calls isn't usually a good idea but put in the (admittedly bad)
> > workaround to still allow doing so across likely-not-calls.
> > string builtins generally will expand to calls though.
> > 
> > I was thinking of even making it stronger and increment "cur_call_cnt"
> > when the stmt (even non-call) has side-effects (would for example
> > cover volatile asms or general volatile touching insns).

After discussing on IRC I am testing the following which adds
a target hook and just treats ldmxcsr and stmxcsr differently
as well as all volatile asms and internal functions.

Bootstrap & regtest on x86_64-unknown-linux-gnu running.

Ok for trunk?

Thanks,
Richard.

2012-12-20  Richard Biener  

PR middle-end/55752
* target.def (sched): Add scheduling_barrier_p.
* targhooks.c (default_scheduling_barrier_p): New function.
* targhooks.h (default_scheduling_barrier_p): Declare.
* doc/tm.texi.in (TARGET_SCHED_SCHEDULING_BARRIER_P): Add.
* doc/tm.texi: Update.
* tree-ssa-ter.c: Include target.h.
(find_replaceable_in_bb): Do not schedule across volatile
asms or stmts the target thinks are scheduling barriers.
Do not treat internal functions as scheduling barrier by default.
* i386/i386.c (TARGET_SCHED_SCHEDULING_BARRIER_P): Override.
(ix86_scheduling_barrier_p): New function.  Handle
IX86_BUILTIN_LDMXCSR and IX86_BUILTIN_STMXCSR.
* Makefile.in (tree-ssa-ter.o): Add $(TARGET_H) dependency.

Index: gcc/target.def
===
*** gcc/target.def  (revision 194632)
--- gcc/target.def  (working copy)
*** parallelism required in output calculati
*** 939,944 
--- 939,954 
  int, (unsigned int opc, enum machine_mode mode),
  hook_int_uint_mode_1)
  
+ /* The following member value is a function that returns whether
+the statement is considered a barrier for scheduling.  By default
+this returns false.  */
+ DEFHOOK
+ (scheduling_barrier_p,
+ "This hook is called by TER to determine whether the statement is\n\
+ a scheduling barrier.",
+ bool, (gimple stmt),
+ default_scheduling_barrier_p)
+ 
  HOOK_VECTOR_END (sched)
  
  /* Functions relating to vectorization.  */
Index: gcc/targhooks.c
===
*** gcc/targhooks.c (revision 194632)
--- gcc/targhooks.c (working copy)
*** default_canonicalize_comparison (int *,
*** 1547,1550 
--- 1547,1557 
  {
  }
  
+ /* Default version of scheduling_barrier_p.  */
+ bool
+ default_scheduling_barrier_p (gimple)
+ {
+   return false;
+ }
+ 
  #include "gt-targhooks.h"
Index: gcc/targhooks.h
===
*** gcc/targhooks.h (revision 194632)
--- gcc/targhooks.h (working copy)
*** extern const char *default_pch_valid_p (
*** 195,197 
--- 195,199 
  extern void default_asm_output_ident_directive (const char*);
  
  extern bool default_member_type_forces_blk (const_tree, enum machine_mode);
+ 
+ extern bool default_scheduling_barrier_p (gimple);
Index: gcc/doc/tm.texi.in
===
*** gcc/doc/tm.texi.in  (revision 194632)
--- gcc/doc/tm.texi.in  (working copy)
*** in its second parameter.
*** 6737,6742 
--- 6737,6744 
  
  @hook TARGET_SCHED_REASSOCIATION_WIDTH
  
+ @hook TARGET_SCHED_SCHEDULING_BARRIER_P
+ 
  @node Sections
  @section Dividing the Output into Sections (Texts, Data, @dots{})
  @c the above section title is WAY too long.  maybe cut the part between
Index: gcc/doc/tm.texi
===
*** gcc/doc/tm.texi (revision 

Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread H.J. Lu
On Thu, Dec 20, 2012 at 7:06 AM, Jan Hubicka  wrote:
>> > Hi Areg,
>> >
>> > Did you mean inlined memcpy/memset are as fast as
>> > the ones in libc.so on both ia32 and Intel64?
>>
>> I would be interested in output of the stringop script.
>
> Also as far as I can remember, none of spec2k6 benchmarks is really stringop
> bound.  On Spec2k GCC was quite bound by memset (within alloc_rtx and bitmap
> oprations) but mostly by collecting page faults there.  Inlining that one made
> quite a lot of difference on K8 hardware, but not on later chips.
>

There is a GCC performance regression bug on EEMBC.  It turns out
that -static was used for linking and optimized memory functions weren't
used.  Remove -static fixed the performance regression.

-- 
H.J.


Re: [Patch, wwwdocs] Update Fortran part of the GCC 4.8 release notes

2012-12-20 Thread Tobias Burnus

Dear Paul,

Paul Richard Thomas wrote:

Could you note that class(*) is complete up to the restriction to fixed length 
character values only?


Done. See http://gcc.gnu.org/gcc-4.8/changes.html#fortran and 
http://gcc.gnu.org/wiki/GFortran#GCC4.8


I admit that the BACKTRACE announcement is slightly premature, but I 
assume that Janus will commit the patch very soon.


Tobias


Re: [PATCH] Fix combined tree for LTO

2012-12-20 Thread Thomas Schwinge
Hi!

On Sat, 10 Nov 2012 10:32:07 -0800, Andrew Pinski 
 wrote:
> 2012-11-10  Andrew Pinski  
> 
> PR bootstrap/55202
> * configure.ac: Set PLUGIN_LD_SUFFIX to just "ld" if it was "ld-new"
> or "collect-ld".
> * configure: Regenerate.

> Index: configure.ac
> ===
> --- configure.ac  (revision 193392)
> +++ configure.ac  (working copy)
> @@ -2003,6 +2003,12 @@ fi])
>  
>  ORIGINAL_PLUGIN_LD_FOR_TARGET=$gcc_cv_ld
>  PLUGIN_LD_SUFFIX=`basename $gcc_cv_ld | sed -e "s,$target_alias-,,"`
> +# if the PLUGIN_LD is set ld-new, just have it as ld
> +# as that is the installed named.
> +if test x$PLUGIN_LD_SUFFIX == xld-new \
> +   || test x$PLUGIN_LD_SUFFIX == xcollect-ld ; then
> +  PLUGIN_LD_SUFFIX=ld
> +fi

Using dash, this caused:

checking for ld... /usr/bin/ld
[...]/gcc/configure: 21384: test: xld: unexpected operator
[...]/gcc/configure: 21385: test: xld: unexpected operator
checking whether we are using gold... no

Fixed in r194637:

PR bootstrap/55202
* configure.ac : Use POSIX shell syntax.
* configure: Regenerate.

diff --git gcc/configure.ac gcc/configure.ac
index c6f57bd..7abe7cf 100644
--- gcc/configure.ac
+++ gcc/configure.ac
@@ -2031,8 +2031,8 @@ ORIGINAL_PLUGIN_LD_FOR_TARGET=$gcc_cv_ld
 PLUGIN_LD_SUFFIX=`basename $gcc_cv_ld | sed -e "s,$target_alias-,,"`
 # if the PLUGIN_LD is set ld-new, just have it as ld
 # as that is the installed named.
-if test x$PLUGIN_LD_SUFFIX == xld-new \
-   || test x$PLUGIN_LD_SUFFIX == xcollect-ld ; then
+if test x$PLUGIN_LD_SUFFIX = xld-new \
+   || test x$PLUGIN_LD_SUFFIX = xcollect-ld ; then
   PLUGIN_LD_SUFFIX=ld
 fi
 AC_ARG_WITH(plugin-ld,


Grüße,
 Thomas


pgpU1wMIs6dcN.pgp
Description: PGP signature


Fix PR55761

2012-12-20 Thread Paulo Matos
2012-12-20 Paulo Matos 

PR tree-optimization/55761
* tree-tailcall.c (process_assignment): Use build_int_cst only for 
integral types,
for every other type that managed to pass all conditions use 
fold_build1.





pr55761.patch
Description: pr55761.patch


Re: Fix PR55761

2012-12-20 Thread Richard Biener
On Thu, Dec 20, 2012 at 5:06 PM, Paulo Matos  wrote:
> 2012-12-20 Paulo Matos 
>
> PR tree-optimization/55761
> * tree-tailcall.c (process_assignment): Use build_int_cst only for 
> integral types,
> for every other type that managed to pass all conditions use 
> fold_build1.

 case NEGATE_EXPR:
   if (FLOAT_TYPE_P (TREE_TYPE (op0)))
 *m = build_real (TREE_TYPE (op0), dconstm1);
+  else if (INTEGRAL_TYPE_P (TREE_TYPE (non_ass_var)))
+*m = build_int_cst (TREE_TYPE (non_ass_var), -1);
   else
-*m = build_int_cst (TREE_TYPE (op0), -1);
+*m = fold_build1 (NEGATE_EXPR, TREE_TYPE (non_ass_var), non_ass_var);

looks bogus (op0 vs. non_ass_var).  I'd rather use fold_unary here as I'm not
sure if callers handle a NEGATE_EXPR in *m.  And I'd use that unconditionally,
this last case looks like it will have very weak testing coverage.  Thus,

   *m = fold_unary (NEGATE_EXPR, TREE_TYPE (op0), op0);

and also in the MINUS_EXPR case.

Richard.


Re: [google 4.7] atomic update of profile counters (issue6965050)

2012-12-20 Thread Jan Hubicka
> On Wed, Dec 19, 2012 at 4:29 PM, Andrew Pinski  wrote:
> >
> > On Wed, Dec 19, 2012 at 12:08 PM, Rong Xu  wrote:
> > > Hi,
> > >
> > > This patch adds the supprot of atomic update the profile counters.
> > > Tested with google internal benchmarks and fdo kernel build.
> >
> > I think you should use the __atomic_ functions instead of __sync_
> > functions as they allow better performance for simple counters as you
> > can use __ATOMIC_RELAXED.
> 
> You are right. I think __ATOMIC_RELAXED should be OK here.
> Thanks for the suggestion.
> 
> >
> > And this would be useful for the trunk also.  I was going to implement
> > this exact thing this week but some other important stuff came up.
> 
> I'll post trunk patch later.

Yes, I like that patch, too. Even if the costs are quite high (and this is why
atomic updates was sort of voted down in the past) the alternative of using TLS
has problems with too-much per-thread memory.

While there are even more alternatives, like recording the changes and
commmiting them in blocks (say at function return), I guess some solution is
better than no solution.

Thanks,
Honza


RE: Fix PR55761

2012-12-20 Thread Paulo Matos
> -Original Message-
> From: Richard Biener [mailto:richard.guent...@gmail.com]
> Sent: 20 December 2012 16:13
> To: Paulo Matos
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: Fix PR55761
> 
> On Thu, Dec 20, 2012 at 5:06 PM, Paulo Matos  wrote:
> > 2012-12-20 Paulo Matos 
> >
> > PR tree-optimization/55761
> > * tree-tailcall.c (process_assignment): Use build_int_cst only for
> integral types,
> > for every other type that managed to pass all conditions use
> fold_build1.
> 
>  case NEGATE_EXPR:
>if (FLOAT_TYPE_P (TREE_TYPE (op0)))
>  *m = build_real (TREE_TYPE (op0), dconstm1);
> +  else if (INTEGRAL_TYPE_P (TREE_TYPE (non_ass_var)))
> +*m = build_int_cst (TREE_TYPE (non_ass_var), -1);
>else
> -*m = build_int_cst (TREE_TYPE (op0), -1);
> +*m = fold_build1 (NEGATE_EXPR, TREE_TYPE (non_ass_var),
> non_ass_var);
> 
> looks bogus (op0 vs. non_ass_var). 

Correct. My mistake applying same MINUS_EXPR pattern to NEGATE_EXPR case.

> I'd rather use fold_unary here as I'm not
> sure if callers handle a NEGATE_EXPR in *m.  And I'd use that
> unconditionally,
> this last case looks like it will have very weak testing coverage.  Thus,
> 
>*m = fold_unary (NEGATE_EXPR, TREE_TYPE (op0), op0);
> 
> and also in the MINUS_EXPR case.
> 

Sounds reasonable. That would simplify it, it seems. Will fix patch and replace 
it in PR.

> Richard.



Re: [google 4.7] atomic update of profile counters (issue6965050)

2012-12-20 Thread Andrew Pinski
On Thu, Dec 20, 2012 at 8:20 AM, Jan Hubicka  wrote:
>> On Wed, Dec 19, 2012 at 4:29 PM, Andrew Pinski  wrote:
>> >
>> > On Wed, Dec 19, 2012 at 12:08 PM, Rong Xu  wrote:
>> > > Hi,
>> > >
>> > > This patch adds the supprot of atomic update the profile counters.
>> > > Tested with google internal benchmarks and fdo kernel build.
>> >
>> > I think you should use the __atomic_ functions instead of __sync_
>> > functions as they allow better performance for simple counters as you
>> > can use __ATOMIC_RELAXED.
>>
>> You are right. I think __ATOMIC_RELAXED should be OK here.
>> Thanks for the suggestion.
>>
>> >
>> > And this would be useful for the trunk also.  I was going to implement
>> > this exact thing this week but some other important stuff came up.
>>
>> I'll post trunk patch later.
>
> Yes, I like that patch, too. Even if the costs are quite high (and this is why
> atomic updates was sort of voted down in the past) the alternative of using 
> TLS
> has problems with too-much per-thread memory.

Actually sometimes (on some processors) atomic increments are cheaper
than doing a regular incremental.  Mainly because there is an
instruction which can handle it in the L2 cache rather than populating
the L1.   Octeon is one such processor where this is true.

Thanks,
Andrew Pinski

>
> While there are even more alternatives, like recording the changes and
> commmiting them in blocks (say at function return), I guess some solution is
> better than no solution.
>
> Thanks,
> Honza


[PATCH] Fix postincrement/decrement of a bitfield (PR middle-end/55750)

2012-12-20 Thread Jakub Jelinek
Hi!

As the following testcase shows, the !is_gimple_min_lval code would for bit
fields want to take address of those bitfields and dereference it, which of
course leads to ICEs.

As discussed with Richard on IRC, this code is not needed at all since
PR48814 fix, so there is no need to teach it about bitfields and instead it
can be just removed altogether.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2012-12-20  Jakub Jelinek  

PR middle-end/55750
* gimplify.c (gimplify_self_mod_expr): Don't force lvalue to
pass is_gimple_min_lval.

* gcc.c-torture/execute/pr55750.c: New test.

--- gcc/gimplify.c.jj   2012-12-20 11:38:45.0 +0100
+++ gcc/gimplify.c  2012-12-20 14:45:42.586627882 +0100
@@ -2391,25 +2391,15 @@ gimplify_self_mod_expr (tree *expr_p, gi
   rhs = TREE_OPERAND (*expr_p, 1);
 
   /* For postfix operator, we evaluate the LHS to an rvalue and then use
- that as the result value and in the postqueue operation.  We also
- make sure to make lvalue a minimal lval, see
- gcc.c-torture/execute/20040313-1.c for an example where this matters.  */
+ that as the result value and in the postqueue operation.  */
   if (postfix)
 {
-  if (!is_gimple_min_lval (lvalue))
-   {
- mark_addressable (lvalue);
- lvalue = build_fold_addr_expr_loc (input_location, lvalue);
- gimplify_expr (&lvalue, pre_p, post_p, is_gimple_val, fb_rvalue);
- lvalue = build_fold_indirect_ref_loc (input_location, lvalue);
-   }
   ret = gimplify_expr (&lhs, pre_p, post_p, is_gimple_val, fb_rvalue);
   if (ret == GS_ERROR)
return ret;
-}
 
-  if (postfix)
-lhs = get_initialized_tmp_var (lhs, pre_p, NULL);
+  lhs = get_initialized_tmp_var (lhs, pre_p, NULL);
+}
 
   /* For POINTERs increment, use POINTER_PLUS_EXPR.  */
   if (POINTER_TYPE_P (TREE_TYPE (lhs)))
--- gcc/testsuite/gcc.c-torture/execute/pr55750.c.jj2012-12-20 
14:24:03.487344949 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr55750.c   2012-12-20 
14:25:10.0 +0100
@@ -0,0 +1,29 @@
+/* PR middle-end/55750 */
+
+extern void abort (void);
+
+struct S
+{
+  int m : 1;
+  int n : 7;
+} arr[2];
+
+__attribute__((noinline, noclone)) void
+foo (unsigned i)
+{
+  arr[i].n++;
+}
+
+int
+main ()
+{
+  arr[0].m = -1;
+  arr[0].n = (1 << 6) - 1;
+  arr[1].m = 0;
+  arr[1].n = -1;
+  foo (0);
+  foo (1);
+  if (arr[0].m != -1 || arr[0].n != -(1 << 6) || arr[1].m != 0 || arr[1].n != 
0)
+abort ();
+  return 0;
+}

Jakub


Re: [Patch, wwwdocs] Update Fortran part of the GCC 4.8 release notes

2012-12-20 Thread Janus Weil
> I admit that the BACKTRACE announcement is slightly premature, but I assume
> that Janus will commit the patch very soon.

yes, it's only a matter of a few hours now ;)

Cheers,
Janus


Re: [PATCH] Use new dump scheme to emit loop unroll/peel summary info (issue6941070)

2012-12-20 Thread Teresa Johnson
On Thu, Dec 20, 2012 at 1:21 AM, Bernhard Reutner-Fischer
 wrote:

Thanks for your comments. Responses inlined below, and new patch include below.

> On Mon, Dec 17, 2012 at 10:44:59PM -0800, Teresa Johnson wrote:
>>Index: tree-ssa-loop-ivcanon.c
>>===
>>--- tree-ssa-loop-ivcanon.c(revision 194516)
>>+++ tree-ssa-loop-ivcanon.c(working copy)
>>@@ -639,22 +639,24 @@ unloop_loops (bitmap loop_closed_ssa_invalidated,
>>
>> /* Tries to unroll LOOP completely, i.e. NITER times.
>>UL determines which loops we are allowed to unroll.
>>-   EXIT is the exit of the loop that should be eliminated.
>>+   EXIT is the exit of the loop that should be eliminated.
>>MAXITER specfy bound on number of iterations, -1 if it is
>>-   not known or too large for HOST_WIDE_INT.  */
>>+   not known or too large for HOST_WIDE_INT. The location
>>+   LOCUS corresponding to the loop is used when emitting
>>+   a summary of the unroll to the dump file.  */
>>
>> static bool
>> try_unroll_loop_completely (struct loop *loop,
>>   edge exit, tree niter,
>>   enum unroll_level ul,
>>-  HOST_WIDE_INT maxiter)
>>+  HOST_WIDE_INT maxiter,
>>+location_t locus)
>
> whitespace damage?

This and the other location you pointed out below as possible
whitespace damage are because the surrounding lines use tab characters
whereas mine uses spaces. Is there a guideline on which one is correct
for gcc? I looked in the style guide but didn't find anything. The
existing code uses a mix of indentation via tabs and spaces. I have
fixed this location and the one you point out below to use a tab
character so that the diff goes away, but I haven't searched the patch
exhaustively for similar issues.

>
>>Index: loop-unroll.c
>>===
>>--- loop-unroll.c  (revision 194516)
>>+++ loop-unroll.c  (working copy)
>>@@ -148,6 +148,61 @@ static void combine_var_copies_in_loop_exit (struc
>>basic_block);
>> static rtx get_expansion (struct var_to_expand *);
>>
>>+/* Emit a message summarizing the unroll or peel that will be
>>+   performed for LOOP, along with the loop's location LOCUS, if
>>+   appropriate given the dump or -fopt-info settings.  */
>>+
>>+static void
>>+report_unroll_peel(struct loop *loop, location_t locus)
>
> missing space before (
>
> contrib/check_GNU_style.sh generally says:
> Dot, space, space, new sentence.
> loop-dump.01.patch:223:+   not known or too large for HOST_WIDE_INT. The 
> location
> loop-dump.01.patch:514:+   * of the for or while statement, if possible. To 
> do this, look
>
> Dot, space, space, end of comment.
> loop-dump.01.patch:504:+/* Return location corresponding to the loop control 
> condition if possible. */
> loop-dump.01.patch:541:+  /* Next check the latch, to see if it is non-empty. 
> *
> loop-dump.01.patch:555:+  /* If all else fails, simply return the current 
> function location. */
>
> There should be exactly one space between function name and parentheses.
> loop-dump.01.patch:329:+report_unroll_peel(struct loop *loop, location_t 
> locus)
> loop-dump.01.patch:386:+  location_t locus = get_loop_location(loop);
> loop-dump.01.patch:404:+  report_unroll_peel(loop, locus);
> loop-dump.01.patch:412:+  location_t locus = get_loop_location(loop);
> loop-dump.01.patch:429:+  report_unroll_peel(loop, locus);
> loop-dump.01.patch:533:+  if ((exit = single_exit(loop)))

I fixed all these and verified that check_GNU_style.sh no longer reports these.

>
>>@@ -248,6 +305,7 @@ peel_loops_completely (int flags)
>>
>>   if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
>>   {
>>+  report_unroll_peel(loop, locus);
>> peel_loop_completely (loop);
>
> whitespace damage? You seem to have this kind of whitespace error
> throughout the patch. I take it you are aware of
> http://gcc.gnu.org/wiki/FormattingCodeForGCC
> and just forgot to have it on the machine you edited?

This was the same issue described above (tab vs space). As noted
above, I fixed this instance too, but there may be others and I'm not
sure what is required or correct.

>
> I seemingly have
> $ cat ~/.vim/gcc_style.vim
> " put this plugin into ~/.vim/gcc_style.vim and source it into your ~/.vimrc 
> via
> " source ~/.vim/gcc_style.vim
> if exists("g:loaded_gcc_style") || &cp
>   finish
> endif
> let g:loaded_gcc_style = 1
>
> augroup gcc_style
>   autocmd BufReadPost,FileReadPost * call s:maybe_gcc_style()
> augroup END
> if exists("*s:maybe_gcc_style")
>   finish
> endif
> let s:cpo_save = &cpo
> set cpo&vim
>
> function! s:maybe_gcc_style()
>   let s:i = 1 + 0
>   while s:i <= line("$") && s:i <= 25
> let s:line = getline(s:i)
> if s:line =~ '^\s*This\sfile\sis\spart\sof\sGCC.*'
>   " gcc-mode

Re: [PATCH] Fix postincrement/decrement of a bitfield (PR middle-end/55750)

2012-12-20 Thread rguenther
Jakub Jelinek  wrote:

>Hi!
>
>As the following testcase shows, the !is_gimple_min_lval code would for
>bit
>fields want to take address of those bitfields and dereference it,
>which of
>course leads to ICEs.
>
>As discussed with Richard on IRC, this code is not needed at all since
>PR48814 fix, so there is no need to teach it about bitfields and
>instead it
>can be just removed altogether.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok.

Thanks,
Richard.

>2012-12-20  Jakub Jelinek  
>
>   PR middle-end/55750
>   * gimplify.c (gimplify_self_mod_expr): Don't force lvalue to
>   pass is_gimple_min_lval.
>
>   * gcc.c-torture/execute/pr55750.c: New test.
>
>--- gcc/gimplify.c.jj  2012-12-20 11:38:45.0 +0100
>+++ gcc/gimplify.c 2012-12-20 14:45:42.586627882 +0100
>@@ -2391,25 +2391,15 @@ gimplify_self_mod_expr (tree *expr_p, gi
>   rhs = TREE_OPERAND (*expr_p, 1);
> 
> /* For postfix operator, we evaluate the LHS to an rvalue and then use
>- that as the result value and in the postqueue operation.  We also
>- make sure to make lvalue a minimal lval, see
>- gcc.c-torture/execute/20040313-1.c for an example where this
>matters.  */
>+ that as the result value and in the postqueue operation.  */
>   if (postfix)
> {
>-  if (!is_gimple_min_lval (lvalue))
>-  {
>-mark_addressable (lvalue);
>-lvalue = build_fold_addr_expr_loc (input_location, lvalue);
>-gimplify_expr (&lvalue, pre_p, post_p, is_gimple_val, fb_rvalue);
>-lvalue = build_fold_indirect_ref_loc (input_location, lvalue);
>-  }
>   ret = gimplify_expr (&lhs, pre_p, post_p, is_gimple_val, fb_rvalue);
>   if (ret == GS_ERROR)
>   return ret;
>-}
> 
>-  if (postfix)
>-lhs = get_initialized_tmp_var (lhs, pre_p, NULL);
>+  lhs = get_initialized_tmp_var (lhs, pre_p, NULL);
>+}
> 
>   /* For POINTERs increment, use POINTER_PLUS_EXPR.  */
>   if (POINTER_TYPE_P (TREE_TYPE (lhs)))
>--- gcc/testsuite/gcc.c-torture/execute/pr55750.c.jj   2012-12-20
>14:24:03.487344949 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr55750.c  2012-12-20
>14:25:10.0 +0100
>@@ -0,0 +1,29 @@
>+/* PR middle-end/55750 */
>+
>+extern void abort (void);
>+
>+struct S
>+{
>+  int m : 1;
>+  int n : 7;
>+} arr[2];
>+
>+__attribute__((noinline, noclone)) void
>+foo (unsigned i)
>+{
>+  arr[i].n++;
>+}
>+
>+int
>+main ()
>+{
>+  arr[0].m = -1;
>+  arr[0].n = (1 << 6) - 1;
>+  arr[1].m = 0;
>+  arr[1].n = -1;
>+  foo (0);
>+  foo (1);
>+  if (arr[0].m != -1 || arr[0].n != -(1 << 6) || arr[1].m != 0 ||
>arr[1].n != 0)
>+abort ();
>+  return 0;
>+}
>
>   Jakub


-- 
Sent from my Android phone with K-9 Mail. Please excuse my brevity.


[PATCH, ARM] Initial pipeline description for Cortex-A7

2012-12-20 Thread Greta Yorsh
Currently, GCC uses generic ARMv7-A tuning for Cortex-A7.
This patch adds an initial pipeline description for Cortex-A7. Details:
* integer/vfp is based on the pipeline description for Cortex-A5,
* models dual issue in limited circumstances using simple_alu_imm and
simple_alu_shift type attribute (introduced by a previous patch),
* basic neon timings.

No regression on qemu for arm-none-eabi target with cpu cortex-a7.

Bootstrap successful on Cortex-A15 (gcc configured with cpu cortex-a7).

Performance evaluation on Cortex-A7 hardware:

Coremark: 
* No change compared to generic tuning even though the generated assembly is
significantly different due to instruction scheduling. 
* Improvement compared to tuning for Cortex-A5: 4% improvement in arm mode
and 9% improvement in thumb mode.
CINT2000:
* compared to generic tuning, overall improvement of 1.9%.
* compared to tuning for Cortex-A5, overall improvement of 1.5%.
* in both cases, all benchmarks improved except 254.gap.
CFP2000:
* compared to generic tuning (which doesn't do much for FP), overall
improvement of 5.5%, all benchmarks improved.
* compared to Cortex-A5 tuning (as pipeline descriptions are nearly
identical) overall no change, but individual benchmarks mixed results.

Ok for trunk?

Thanks,
Greta

gcc/ChangeLog

2012-12-20  Greta Yorsh  

* config/arm/cortex-a7.md: New file.
* config/arm/arm.md: Include cortex-a7.md.
(generic_sched): Don't use generic scheduler for Cortex-A7.
(generic_vfp): Likewise.
* config/arm/t-arm (arm_cpu_table): Likewise.
* config/arm/arm.c: (TARGET_SCHED_REORDER): Use arm_sched_reorder.
(arm_sched_reorder): New function.
(cortexa7_older_only,cortexa7_younger): Likewise.diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 84ce56f..ab6c88b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -132,6 +132,7 @@ static void arm_output_function_prologue (FILE *, 
HOST_WIDE_INT);
 static int arm_comp_type_attributes (const_tree, const_tree);
 static void arm_set_default_type_attributes (tree);
 static int arm_adjust_cost (rtx, rtx, rtx, int);
+static int arm_sched_reorder (FILE *, int, rtx *, int *, int);
 static int optimal_immediate_sequence (enum rtx_code code,
   unsigned HOST_WIDE_INT val,
   struct four_ints *return_sequence);
@@ -366,6 +367,9 @@ static const struct attribute_spec arm_attribute_table[] =
 #undef  TARGET_SCHED_ADJUST_COST
 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER arm_sched_reorder
+
 #undef TARGET_REGISTER_MOVE_COST
 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 
@@ -8680,6 +8684,164 @@ arm_memory_move_cost (enum machine_mode mode, 
reg_class_t rclass,
 }
 }
 
+
+/* Return true if and only if this insn can dual-issue only as older.  */
+static bool
+cortexa7_older_only (rtx insn)
+{
+  if (recog_memoized (insn) < 0)
+return false;
+
+  if (get_attr_insn (insn) == INSN_MOV)
+return false;
+
+  switch (get_attr_type (insn))
+{
+case TYPE_ALU_REG:
+case TYPE_LOAD_BYTE:
+case TYPE_LOAD1:
+case TYPE_STORE1:
+case TYPE_FFARITHS:
+case TYPE_FADDS:
+case TYPE_FFARITHD:
+case TYPE_FADDD:
+case TYPE_FCPYS:
+case TYPE_F_CVT:
+case TYPE_FCMPS:
+case TYPE_FCMPD:
+case TYPE_FCONSTS:
+case TYPE_FCONSTD:
+case TYPE_FMULS:
+case TYPE_FMACS:
+case TYPE_FMULD:
+case TYPE_FMACD:
+case TYPE_FDIVS:
+case TYPE_FDIVD:
+case TYPE_F_2_R:
+case TYPE_F_FLAG:
+case TYPE_F_LOADS:
+case TYPE_F_STORES:
+  return true;
+default:
+  return false;
+}
+}
+
+/* Return true if and only if this insn can dual-issue as younger.  */
+static bool
+cortexa7_younger (FILE *file, int verbose, rtx insn)
+{
+  if (recog_memoized (insn) < 0)
+{
+  if (verbose > 5)
+fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
+  return false;
+}
+
+  if (get_attr_insn (insn) == INSN_MOV)
+return true;
+
+  switch (get_attr_type (insn))
+{
+case TYPE_SIMPLE_ALU_IMM:
+case TYPE_SIMPLE_ALU_SHIFT:
+case TYPE_BRANCH:
+  return true;
+default:
+  return false;
+}
+}
+
+
+/* Look for an instruction that can dual issue only as an older
+   instruction, and move it in front of any instructions that can
+   dual-issue as younger, while preserving the relative order of all
+   other instructions in the ready list.  This is a hueuristic to help
+   dual-issue in later cycles, by postponing issue of more flexible
+   instructions.  This heuristic may affect dual issue opportunities
+   in the current cycle.  */
+static void
+cortexa7_sched_reorder (FILE *file, int verbose, rtx *ready, int *n_readyp,
+int clock)
+{
+  int i;
+  int first_older_only = -1, first_younger = -1;
+
+  if (verbose > 5)
+fprintf (file,
+  

[patch] fix install dependencies for target libraries

2012-12-20 Thread Matthias Klose
This was seen with the libgo installation [1], but from my point of view can
happen when the install target is called with -j >1, libtool seems to fall back
to the system libraries if the library in the install location is not available
(which is always the case if you install into an empty dir set with DESTDIR).
Currently it just works for a non-parallel install because the dependencies in
Makefile.def are created in the right order.

Ok for the trunk?

  Matthias

[1] http://gcc.gnu.org/ml/gcc-patches/2012-12/msg01192.html


2012-12-20  Matthias Klose  

	* Makefile.def (install-target-libgfortran): Depend on
	install-target-libquadmath, install-target-libgcc.
	(install-target-libsanitizer): Depend on install-target-libgcc.
	(install-target-libjava): Depend on install-target-libgcc.
	(install-target-libitm): Depend on install-target-libgcc.
	(install-target-libobjc): Depend on install-target-libgcc.
	(install-target-libstdc++-v3): Depend on install-target-libgcc.
	* Makefile.in: Regenerate.

Index: Makefile.def
===
--- Makefile.def	(Revision 194635)
+++ Makefile.def	(Arbeitskopie)
@@ -515,6 +515,13 @@
 dependencies = { module=all-target-libstdc++-v3; on=configure-target-libgomp; };
 
 dependencies = { module=install-target-libgo; on=install-target-libatomic; };
+dependencies = { module=install-target-libgfortran; on=install-target-libquadmath; };
+dependencies = { module=install-target-libgfortran; on=install-target-libgcc; };
+dependencies = { module=install-target-libsanitizer; on=install-target-libgcc; };
+dependencies = { module=install-target-libjava; on=install-target-libgcc; };
+dependencies = { module=install-target-libitm; on=install-target-libgcc; };
+dependencies = { module=install-target-libobjc; on=install-target-libgcc; };
+dependencies = { module=install-target-libstdc++-v3; on=install-target-libgcc; };
 
 // Target modules in the 'src' repository.
 lang_env_dependencies = { module=libtermcap; };


Re: [patch] fix install dependencies for target libraries

2012-12-20 Thread Ian Lance Taylor
On Thu, Dec 20, 2012 at 10:22 AM, Matthias Klose  wrote:
> This was seen with the libgo installation [1], but from my point of view can
> happen when the install target is called with -j >1, libtool seems to fall 
> back
> to the system libraries if the library in the install location is not 
> available
> (which is always the case if you install into an empty dir set with DESTDIR).
> Currently it just works for a non-parallel install because the dependencies in
> Makefile.def are created in the right order.
>
> Ok for the trunk?

This is OK with a ChangeLog entry.

Thanks.

Ian


Re: [patch] fix install dependencies for target libraries

2012-12-20 Thread Matthias Klose
Am 20.12.2012 20:11, schrieb Ian Lance Taylor:
> On Thu, Dec 20, 2012 at 10:22 AM, Matthias Klose  wrote:
>> This was seen with the libgo installation [1], but from my point of view can
>> happen when the install target is called with -j >1, libtool seems to fall 
>> back
>> to the system libraries if the library in the install location is not 
>> available
>> (which is always the case if you install into an empty dir set with DESTDIR).
>> Currently it just works for a non-parallel install because the dependencies 
>> in
>> Makefile.def are created in the right order.
>>
>> Ok for the trunk?
> 
> This is OK with a ChangeLog entry.

committed, with the ChangeLog entry from the original mail.

  Matthias



Re: [google 4.7] fdo build for linux kernel (issue 6968046)

2012-12-20 Thread Rong Xu
On Wed, Dec 19, 2012 at 5:22 PM, Rong Xu  wrote:
> On Wed, Dec 19, 2012 at 5:04 PM,   wrote:
>> The change in gcov-io.h is from a different patch.
>
> sorry. here is the patch for gcov-io.h:
>
> Index: gcov-io.h
> ===
> --- gcov-io.h   (revision 194562)
> +++ gcov-io.h   (working copy)
> @@ -781,8 +781,8 @@
>   unused) */
>
>unsigned n_functions;/* number of functions */
> -  const struct gcov_fn_info *const *functions; /* pointer to pointers
> - to function information  */
> +  const struct gcov_fn_info **functions; /* pointer to pointers
> +   to function information  */
>  };
>
>  /* Information about a single imported module.  */
> @@ -988,8 +988,7 @@
>  GCOV_LINKAGE void gcov_seek (gcov_position_t /*position*/) ATTRIBUTE_HIDDEN;
>  GCOV_LINKAGE void gcov_truncate (void) ATTRIBUTE_HIDDEN;
>  GCOV_LINKAGE gcov_unsigned_t gcov_string_length (const char *)
> ATTRIBUTE_HIDDEN;
> -GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *,
> -   struct gcov_summary *);
> +GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *);
>  #else
>  /* Available outside libgcov */
>  GCOV_LINKAGE void gcov_sync (gcov_position_t /*base*/,
>
>>
>> David
>>
>>
>> https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c
>> File gcc/gcov-io.c (right):
>>
>> https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c#newcode688
>> gcc/gcov-io.c:688:
>> Have you compared this with this impl:
>>
>> while (x)
>> {
>>c++;
>>x&=(x-1)
>> }
>> return c;
>>
>
> I did not try this pimplier version. I can do a test on the dump speed
> and report back.

This simpler version is about 2% slow in dumping the profiles (average
of 10 dumps). But this is not a big deal.
I'll use this this version.

-Rong

>
>> https://codereview.appspot.com/6968046/


Re: [fortran, patch] Allow displaying backtraces from user code

2012-12-20 Thread Janus Weil
 Attached is a new patch, which expands the documentation according to
 your proposal, and uses the name BACKTRACE. I hope that both Janne and
 Tobias can agree with this naming decision ...
>>>
>>> Looks fine from my side.
>>
>> Great, thanks. Janne?
>
> Yes, Ok for trunk.

Thanks again to both of you. Committed as r194648.

Cheers,
Janus


>>> Can you also add a quip to
>>> http://gcc.gnu.org/wiki/GFortran#GCC4.8 ?
>>
>> Sure, as soon as the patch is committed ...
>>
>> Cheers,
>> Janus
>
>
>
> --
> Janne Blomqvist


Re: [google 4.7] atomic update of profile counters (issue6965050)

2012-12-20 Thread Rong Xu
we have this patch primarily for getting valid profile counts. we
observe that for some high-threaded programs, we are getting poor
counter due to data racing of counter update (like counter value is
only 15% of what it supposed to be for a 10-thread program).

In general, enabling atomic updates slows down programs. (for my some
of my toy programs, it has 3x slow down.) And that the reason I use
options to control value and edge profile count.

-Rong

On Thu, Dec 20, 2012 at 8:57 AM, Andrew Pinski  wrote:
> On Thu, Dec 20, 2012 at 8:20 AM, Jan Hubicka  wrote:
>>> On Wed, Dec 19, 2012 at 4:29 PM, Andrew Pinski  wrote:
>>> >
>>> > On Wed, Dec 19, 2012 at 12:08 PM, Rong Xu  wrote:
>>> > > Hi,
>>> > >
>>> > > This patch adds the supprot of atomic update the profile counters.
>>> > > Tested with google internal benchmarks and fdo kernel build.
>>> >
>>> > I think you should use the __atomic_ functions instead of __sync_
>>> > functions as they allow better performance for simple counters as you
>>> > can use __ATOMIC_RELAXED.
>>>
>>> You are right. I think __ATOMIC_RELAXED should be OK here.
>>> Thanks for the suggestion.
>>>
>>> >
>>> > And this would be useful for the trunk also.  I was going to implement
>>> > this exact thing this week but some other important stuff came up.
>>>
>>> I'll post trunk patch later.
>>
>> Yes, I like that patch, too. Even if the costs are quite high (and this is 
>> why
>> atomic updates was sort of voted down in the past) the alternative of using 
>> TLS
>> has problems with too-much per-thread memory.
>
> Actually sometimes (on some processors) atomic increments are cheaper
> than doing a regular incremental.  Mainly because there is an
> instruction which can handle it in the L2 cache rather than populating
> the L1.   Octeon is one such processor where this is true.
>
> Thanks,
> Andrew Pinski
>
>>
>> While there are even more alternatives, like recording the changes and
>> commmiting them in blocks (say at function return), I guess some solution is
>> better than no solution.
>>
>> Thanks,
>> Honza


Re: [google 4.7] atomic update of profile counters (issue6965050)

2012-12-20 Thread Andrew Pinski
On Thu, Dec 20, 2012 at 11:35 AM, Rong Xu  wrote:
> we have this patch primarily for getting valid profile counts. we
> observe that for some high-threaded programs, we are getting poor
> counter due to data racing of counter update (like counter value is
> only 15% of what it supposed to be for a 10-thread program).

I have seen much worse on Octeon running with 32-threaded program.  I
think it was only 1% of what it should have been.


>
> In general, enabling atomic updates slows down programs. (for my some
> of my toy programs, it has 3x slow down.) And that the reason I use
> options to control value and edge profile count.

I think on Octeon, the atomic updates would be a speedup because of
the atomic instruction which was added explicitly for incrementing a
statistics counter.  Internally at Cavium, I might just turn this on
by default as it even helps the one thread case :).

Thanks,
Andrew Pinski

>
> -Rong
>
> On Thu, Dec 20, 2012 at 8:57 AM, Andrew Pinski  wrote:
>> On Thu, Dec 20, 2012 at 8:20 AM, Jan Hubicka  wrote:
 On Wed, Dec 19, 2012 at 4:29 PM, Andrew Pinski  wrote:
 >
 > On Wed, Dec 19, 2012 at 12:08 PM, Rong Xu  wrote:
 > > Hi,
 > >
 > > This patch adds the supprot of atomic update the profile counters.
 > > Tested with google internal benchmarks and fdo kernel build.
 >
 > I think you should use the __atomic_ functions instead of __sync_
 > functions as they allow better performance for simple counters as you
 > can use __ATOMIC_RELAXED.

 You are right. I think __ATOMIC_RELAXED should be OK here.
 Thanks for the suggestion.

 >
 > And this would be useful for the trunk also.  I was going to implement
 > this exact thing this week but some other important stuff came up.

 I'll post trunk patch later.
>>>
>>> Yes, I like that patch, too. Even if the costs are quite high (and this is 
>>> why
>>> atomic updates was sort of voted down in the past) the alternative of using 
>>> TLS
>>> has problems with too-much per-thread memory.
>>
>> Actually sometimes (on some processors) atomic increments are cheaper
>> than doing a regular incremental.  Mainly because there is an
>> instruction which can handle it in the L2 cache rather than populating
>> the L1.   Octeon is one such processor where this is true.
>>
>> Thanks,
>> Andrew Pinski
>>
>>>
>>> While there are even more alternatives, like recording the changes and
>>> commmiting them in blocks (say at function return), I guess some solution is
>>> better than no solution.
>>>
>>> Thanks,
>>> Honza


Re: [google 4.7] fdo build for linux kernel (issue 6968046)

2012-12-20 Thread Xinliang David Li
It  depends on the value distribution .

David

On Thu, Dec 20, 2012 at 11:30 AM, Rong Xu  wrote:
> On Wed, Dec 19, 2012 at 5:22 PM, Rong Xu  wrote:
>> On Wed, Dec 19, 2012 at 5:04 PM,   wrote:
>>> The change in gcov-io.h is from a different patch.
>>
>> sorry. here is the patch for gcov-io.h:
>>
>> Index: gcov-io.h
>> ===
>> --- gcov-io.h   (revision 194562)
>> +++ gcov-io.h   (working copy)
>> @@ -781,8 +781,8 @@
>>   unused) */
>>
>>unsigned n_functions;/* number of functions */
>> -  const struct gcov_fn_info *const *functions; /* pointer to pointers
>> - to function information  */
>> +  const struct gcov_fn_info **functions; /* pointer to pointers
>> +   to function information  */
>>  };
>>
>>  /* Information about a single imported module.  */
>> @@ -988,8 +988,7 @@
>>  GCOV_LINKAGE void gcov_seek (gcov_position_t /*position*/) ATTRIBUTE_HIDDEN;
>>  GCOV_LINKAGE void gcov_truncate (void) ATTRIBUTE_HIDDEN;
>>  GCOV_LINKAGE gcov_unsigned_t gcov_string_length (const char *)
>> ATTRIBUTE_HIDDEN;
>> -GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *,
>> -   struct gcov_summary *);
>> +GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *);
>>  #else
>>  /* Available outside libgcov */
>>  GCOV_LINKAGE void gcov_sync (gcov_position_t /*base*/,
>>
>>>
>>> David
>>>
>>>
>>> https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c
>>> File gcc/gcov-io.c (right):
>>>
>>> https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c#newcode688
>>> gcc/gcov-io.c:688:
>>> Have you compared this with this impl:
>>>
>>> while (x)
>>> {
>>>c++;
>>>x&=(x-1)
>>> }
>>> return c;
>>>
>>
>> I did not try this pimplier version. I can do a test on the dump speed
>> and report back.
>
> This simpler version is about 2% slow in dumping the profiles (average
> of 10 dumps). But this is not a big deal.
> I'll use this this version.
>
> -Rong
>
>>
>>> https://codereview.appspot.com/6968046/


[patch] std::unique_ptr improvements

2012-12-20 Thread Jonathan Wakely
This patch started when I noticed that it's not possibly to construct
a shared_ptr from unique_ptr, then I discovered we don't
use D::pointer if it exists, and there were a number of other
non-conformance issues with our std::unique_ptr.  I ended up
fixing them by implementing Geoffrey's proposed resolution for LWG
issue 2118, which isn't official yet but is better than what we had
before so is a step in the right direction, even if it ends up needing
further revision when 2118 is resolved.

* include/std/functional (_Require): Move to ...
* include/std/type_traits (_Require): ... here.
* include/bits/shared_ptr_base.h (__shared_count::_S_create_from_up):
Handle unique_ptr for arrays or with custom pointer types.
(__shared_ptr::__shared_ptr(unique_ptr<_Tp1, _Del>&&): Likewise.
* include/bits/unique_ptr.h (unique_ptr<_Tp[], _Dp>): Use
_Dp::pointer if defined. Implement proposed resolution of LWG 2118.
* testsuite/20_util/shared_ptr/cons/unique_ptr_array.cc: New.
* testsuite/20_util/unique_ptr/assign/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/cons/array_convertible_neg.cc: New.
* testsuite/20_util/unique_ptr/cons/convertible_neg.cc: New.
* testsuite/20_util/unique_ptr/cons/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/modifiers/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/requirements/pointer_type_array.cc: New.
* testsuite/20_util/shared_ptr/cons/unique_ptr.cc: Adjust comments.
* testsuite/20_util/unique_ptr/cons/pointer_array_convertible_neg.cc:
Likewise.
* testsuite/20_util/unique_ptr/requirements/pointer_type.cc: Likewise.
* testsuite/20_util/bind/ref_neg.cc: Adjust dg-error line number.
* testsuite/20_util/declval/requirements/1_neg.cc: Likewise.
* testsuite/20_util/default_delete/48631_neg.cc: Likewise.
* testsuite/20_util/shared_ptr/cons/43820_neg.cc: Likewise.
* testsuite/20_util/unique_ptr/assign/48635_neg.cc: Likewise.
* testsuite/20_util/unique_ptr/modifiers/reset_neg.cc: Adjust
dg-error text.
* testsuite/20_util/unique_ptr/cons/ptr_deleter_neg.cc: Use
different instantiations so static_assert fails for each.

Thanks to Geoffrey and Lawrence for input and test cases.

Tested x86_64-linux, committed to trunk.
commit 907290c8077e6757c56fc64c9160c4bdaea86b90
Author: Jonathan Wakely 
Date:   Thu Dec 20 17:57:33 2012 +

* include/std/functional (_Require): Move to ...
* include/std/type_traits (_Require): ... here.
* include/bits/shared_ptr_base.h (__shared_count::_S_create_from_up):
Handle unique_ptr for arrays or with custom pointer types.
(__shared_ptr::__shared_ptr(unique_ptr<_Tp1, _Del>&&): Likewise.
* include/bits/unique_ptr.h (unique_ptr<_Tp[], _Dp>): Use
_Dp::pointer if defined. Implement proposed resolution of LWG 2118.
* testsuite/20_util/shared_ptr/cons/unique_ptr_array.cc: New.
* testsuite/20_util/unique_ptr/assign/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/cons/array_convertible_neg.cc: New.
* testsuite/20_util/unique_ptr/cons/convertible_neg.cc: New.
* testsuite/20_util/unique_ptr/cons/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/modifiers/cv_qual.cc: New.
* testsuite/20_util/unique_ptr/requirements/pointer_type_array.cc: New.
* testsuite/20_util/shared_ptr/cons/unique_ptr.cc: Adjust comments.
* testsuite/20_util/unique_ptr/cons/pointer_array_convertible_neg.cc:
Likewise.
* testsuite/20_util/unique_ptr/requirements/pointer_type.cc: Likewise.
* testsuite/20_util/bind/ref_neg.cc: Adjust dg-error line number.
* testsuite/20_util/declval/requirements/1_neg.cc: Likewise.
* testsuite/20_util/default_delete/48631_neg.cc: Likewise.
* testsuite/20_util/shared_ptr/cons/43820_neg.cc: Likewise.
* testsuite/20_util/unique_ptr/assign/48635_neg.cc: Likewise.
* testsuite/20_util/unique_ptr/modifiers/reset_neg.cc: Adjust
dg-error text.
* testsuite/20_util/unique_ptr/cons/ptr_deleter_neg.cc: Use
different instantiations so static_assert fails for each.

diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h 
b/libstdc++-v3/include/bits/shared_ptr_base.h
index ead3728..9d9fecb 100644
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -616,7 +616,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_create_from_up(std::unique_ptr<_Tp, _Del>&& __r,
  typename std::enable_if::value>::type* = 0)
{
- return new _Sp_counted_deleter<_Tp*, _Del, std::allocator,
+ typedef typename unique_ptr<_Tp, _Del>::pointer _Ptr;
+ return new _Sp_counted_deleter<_Ptr, _Del, std::allocator,
_Lp>(__r.get(), __r.get_deleter());
}
 
@@ -625,9 +626,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VE

Re: [google 4.7] fdo build for linux kernel (issue 6968046)

2012-12-20 Thread Rong Xu
that's right. but there is no way to predict the pattern.
what I meant was as far as it does not introduce major slow-down in
dumping profile, I'd like to use the simpler version.
what do you think?

-Rong

On Thu, Dec 20, 2012 at 11:54 AM, Xinliang David Li  wrote:
> It  depends on the value distribution .
>
> David
>
> On Thu, Dec 20, 2012 at 11:30 AM, Rong Xu  wrote:
>> On Wed, Dec 19, 2012 at 5:22 PM, Rong Xu  wrote:
>>> On Wed, Dec 19, 2012 at 5:04 PM,   wrote:
 The change in gcov-io.h is from a different patch.
>>>
>>> sorry. here is the patch for gcov-io.h:
>>>
>>> Index: gcov-io.h
>>> ===
>>> --- gcov-io.h   (revision 194562)
>>> +++ gcov-io.h   (working copy)
>>> @@ -781,8 +781,8 @@
>>>   unused) */
>>>
>>>unsigned n_functions;/* number of functions */
>>> -  const struct gcov_fn_info *const *functions; /* pointer to pointers
>>> - to function information  
>>> */
>>> +  const struct gcov_fn_info **functions; /* pointer to pointers
>>> +   to function information  */
>>>  };
>>>
>>>  /* Information about a single imported module.  */
>>> @@ -988,8 +988,7 @@
>>>  GCOV_LINKAGE void gcov_seek (gcov_position_t /*position*/) 
>>> ATTRIBUTE_HIDDEN;
>>>  GCOV_LINKAGE void gcov_truncate (void) ATTRIBUTE_HIDDEN;
>>>  GCOV_LINKAGE gcov_unsigned_t gcov_string_length (const char *)
>>> ATTRIBUTE_HIDDEN;
>>> -GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *,
>>> -   struct gcov_summary *);
>>> +GCOV_LINKAGE unsigned gcov_gcda_file_size (struct gcov_info *);
>>>  #else
>>>  /* Available outside libgcov */
>>>  GCOV_LINKAGE void gcov_sync (gcov_position_t /*base*/,
>>>

 David


 https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c
 File gcc/gcov-io.c (right):

 https://codereview.appspot.com/6968046/diff/1/gcc/gcov-io.c#newcode688
 gcc/gcov-io.c:688:
 Have you compared this with this impl:

 while (x)
 {
c++;
x&=(x-1)
 }
 return c;

>>>
>>> I did not try this pimplier version. I can do a test on the dump speed
>>> and report back.
>>
>> This simpler version is about 2% slow in dumping the profiles (average
>> of 10 dumps). But this is not a big deal.
>> I'll use this this version.
>>
>> -Rong
>>
>>>
 https://codereview.appspot.com/6968046/


Re: [patch] fix install dependencies for target libraries

2012-12-20 Thread Tobias Burnus

Am 20.12.2012 19:22, schrieb Matthias Klose:

This was seen with the libgo installation [1], but from my point of view can
happen when the install target is called with -j >1, libtool seems to fall back
to the system libraries if the library in the install location is not available
(which is always the case if you install into an empty dir set with DESTDIR).
Currently it just works for a non-parallel install because the dependencies in
Makefile.def are created in the right order.

Ok for the trunk?


For the Fortran change: Can you test with --disable-libquadmath 
--disable-libquadmath-support? Will that work by silently ignoring the 
libquadmath dependence or will it break?



+dependencies = { module=install-target-libgfortran; 
on=install-target-libquadmath; };


Tobias


Re: [Patch, fortran] PR55763 - Issues with some simpler CLASS(*) programs

2012-12-20 Thread Tobias Burnus

Paul Richard Thomas wrote:

Thanks to Tobias for coming up so quickly with class(*) bugs!


That was simple: I could mine Reinhold Bader's collection. Only the 
ICE-on-invalid part of the test case is mine. Please credit him in/for 
the test case.



Bootstrapped and regtested on FC17/x86_64 - OK for trunk?


OK. Thanks for the quick patch. Hopefully, fixing the remaining issues 
of that PR will be as quick.


(Can you update the TODO list in the other CLASS(*) PR - or is it complete?)

Tobias


Re: [wwwdocs,Java] Obsolete GCJ FAQ entry for Solaris?

2012-12-20 Thread Gerald Pfeifer
PING.

On Fri, 2 Nov 2012, Gerald Pfeifer wrote:
> Rainer (or others),
> 
> the FAQ entry below seems obsolete to me (dates back more than a
> decade).  Shall we remove it, or is there something else we still
> should document (in addition to gcc/doc/install.texi)?
> 
> Gerald
> 
> Index: faq.html
> ===
> RCS file: /cvs/gcc/wwwdocs/htdocs/java/faq.html,v
> retrieving revision 1.69
> diff -u -3 -p -r1.69 faq.html
> --- faq.html  2 Nov 2012 19:59:34 -   1.69
> +++ faq.html  2 Nov 2012 20:29:12 -
> @@ -36,7 +36,6 @@
>
>  I need something more recent than the last 
> release; how
>should I build it?
> -Linker bug on Solaris
>
>  
>  Gcj Compile/Link Questions 
> @@ -278,33 +277,6 @@ $ gij HelloWorld
>  
>
>  
> -  
> -  3.2 Linker bug on Solaris
> -   
> -
> -  There is a known problem with the  href="http://gcc.gnu.org/ml/gcc-bugs/1999-10/msg00159.html";> 
> -  native Solaris linker when using gcc/gcj. A good indication 
> you've 
> -  run into this problem is if you get an error that looks like the 
> following 
> -  when building libgcj: 
> -  
> -ld: warning: option -o appears more than once, first setting taken
> -ld: fatal: file libfoo.so: cannot open file: No such file or directory
> -ld: fatal: File processing errors. No output written to .libs/libfoo.so
> -collect2: ld returned 1 exit status
> -  
> -  A known workaround for this and other reported link problems on 
> the 
> -  various releases of Solaris is to build gcc/gcj with the  href="ftp://sources.redhat.com/pub/binutils/snapshots";> 
> -  latest GNU binutils instead of the native Solaris ld. 
> The 
> -  most straightforward way to do this is to build and install 
> binutils, 
> -  and then reference it in the configure for gcc via 
> --with-ld=/path_to_binutils_install/bin/ld 
> -  (--with-as may also be similarly specified but is not 
> believed 
> -  to be required).
> -  
> -  Please note, gcc/gcj must be built using GNU ld prior to doing a 
> -  clean build of libgcj! 
> -
> -  
> -
>Gcj Compile/Link Questions
>   
>4.1 Why do I get undefined reference to 
> `main' 


fix libquadmath build regression

2012-12-20 Thread Alexandre Oliva
Revision 193063 brought in calls to fraiseexcept() into libquadmath,
which caused a build regression on Fedora 16 (BLAG 160k actually) x86_64
while building an i686-linux-gnu native toolchain.

The problem is that glibc has an extern inline definition of
fraiseexcept that is introduced by including fenv.h (it's in
bits/fenv.h), and this definition requires SSE support regardless of
target arch of word width, so it doesn't work for an i686 native that
doesn't assume SSE registers and instructions are available.

This bug is fixed in newer versions of glibc, but I figured it wouldn't
hurt to have a work-around in place for libquadmath to build, detecting
that the extern inline in the header is broken and introducing a wrapper
that bypasses the header so as to use the out-of-line definition in the
math library.

Is this ok to install?

Deal with SSE-requiring extern inline in bits/fenv.h

From: Alexandre Oliva 

for  libquadmath/ChangeLog

	* configure.ac: Check that calling feraiseexcept compiles when
	fenv.h is included.  Define QUADMATH_FERAISEEXCEPT cpp macro
	and LIBQUAD_FERAISEEXCEPT conditional otherwise.
	* Makefile.am (libquadmath_la_SOURCES): Add
	math/feraiseexcept.c, conditional on LIBQUAD_FERAISEEXCEPT.
	* aclocal.m4: Rebuilt.
	* configure: Rebuilt.
	* config.h.in: Rebuilt.
	* Makefile.in: Rebuilt.
	* math/feraiseexcept.c: New file.
	* math/quadmath-imp.h (__quadmath_feraiseexcept): Declare.
	* math/ccoshq.c: Use QUADMATH_FERAISEEXCEPT macro to call, and
	to decide whether to call, feraiseexcept.
	* math/cexpq.c: Likewise.
	* math/csinhq.c: Likewise.
	* math/csinq.c: Likewise.
	* math/ctanhq.c: Likewise.
	* math/ctanq.c: Likewise.
	* math/ilogbq.c: Likewise.  Include fenv.h if HAVE_FENV_H.
---

 libquadmath/Makefile.am  |3 +
 libquadmath/Makefile.in  |  200 +++---
 libquadmath/aclocal.m4   |   74 +-
 libquadmath/config.h.in  |3 +
 libquadmath/configure|   58 +++
 libquadmath/configure.ac |   21 
 libquadmath/math/ccoshq.c|8 +-
 libquadmath/math/cexpq.c |   12 +-
 libquadmath/math/csinhq.c|   12 +-
 libquadmath/math/csinq.c |   12 +-
 libquadmath/math/ctanhq.c|4 -
 libquadmath/math/ctanq.c |4 -
 libquadmath/math/feraiseexcept.c |9 ++
 libquadmath/math/ilogbq.c|   16 ++-
 libquadmath/quadmath-imp.h   |5 +
 15 files changed, 318 insertions(+), 123 deletions(-)
 create mode 100644 libquadmath/math/feraiseexcept.c


diff --git a/libquadmath/Makefile.am b/libquadmath/Makefile.am
index 6c97ee8..9acf619 100644
--- a/libquadmath/Makefile.am
+++ b/libquadmath/Makefile.am
@@ -69,6 +69,9 @@ libquadmath_la_SOURCES = \
   printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
   strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
 
+if LIBQUAD_FERAISEEXCEPT
+libquadmath_la_SOURCES += math/feraiseexcept.c
+endif
 
 # Work around what appears to be a GNU make bug handling MAKEFLAGS
 # values defined in terms of make variables, as is the case for CC and
diff --git a/libquadmath/configure.ac b/libquadmath/configure.ac
index c547da8..765dfea 100644
--- a/libquadmath/configure.ac
+++ b/libquadmath/configure.ac
@@ -150,6 +150,27 @@ else
   fi
 fi
 
+if test "x$ac_cv_header_fenv_h" = "xyes"; then
+  dnl Some versions of libc 2.16 for x86_64 have an extern inline
+  dnl definition of feraiseexcept in bits/fenv.h that requires SSE
+  dnl support, and they fail to compile with -m32 when targeting
+  dnl pre-x86_64 32-bit architectures.
+
+  dnl This wrapper enables us to bypass the inline definition and call
+  dnl the out-of-line feraiseexcept definition, because it does not
+  dnl include fenv.h itself.
+
+  AC_CACHE_CHECK([whether feraiseexcept is broken in fenv.h], [quadmath_cv_feraiseexcept_fenv_broken], [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include ]], [feraiseexcept (FE_INVALID);])], [quadmath_cv_feraiseexcept_fenv_broken=no], [quadmath_cv_feraiseexcept_fenv_broken=yes])])
+  if test "x$quadmath_cv_feraiseexcept_fenv_broken" = "xyes"; then
+feraiseexcept=__quadmath_feraiseexcept
+  else
+feraiseexcept=feraiseexcept
+  fi
+  AC_DEFINE_UNQUOTED([QUADMATH_FERAISEEXCEPT], [$feraiseexcept],
+		 [Optional replacement for compile-time broken feraiseexcept.])
+fi
+AM_CONDITIONAL([LIBQUAD_FERAISEEXCEPT], [test "x$ac_cv_header_fenv_h$quadmath_cv_feraiseexcept_fenv_broken" = "xyesyes"])
+
 # Check for hidden visibility (copied from libssp).
 saved_CFLAGS="$CFLAGS"
 CFLAGS="$CFLAGS -Werror"
diff --git a/libquadmath/math/ccoshq.c b/libquadmath/math/ccoshq.c
index 8d55ad3..c1b24ac 100644
--- a/libquadmath/math/ccoshq.c
+++ b/libquadmath/math/ccoshq.c
@@ -89,9 +89,9 @@ ccoshq (__complex128 x)
 	  __imag__ retval = __real__ x == 0.0Q ? 0.0Q : nanq ("");
 	  __real__ retval = nanq ("") + nanq ("");
 
-#ifdef HAVE_FENV_H
+#ifdef QUADMATH_FERAISEEXCEPT
 	  if (icls == QUADFP_INFINITE)
-	f

[PR libmudflap/53359] don't register symbols not emitted

2012-12-20 Thread Alexandre Oliva
libmudflap emits a global initializer that registers memory ranges for
global data symbols.  However, even if IPA decides not to emit a symbol
because it's unused, we'd still emit registration sequences for them in
some cases, which, in the PR testcase, would result in TOC references to
the undefined symbols.

This patch fixes the problem, avoiding registration for symbols that are
not present in the varpool.

Regstrapped on x86_64-linux-gnu and i686-linux-gnu; I've also verified
that it removes the TOC references on a ppc64-linux-gnu cross.

Ok to install?

don't let mudflap register global symbols that won't be emitted

From: Alexandre Oliva 

for  gcc/ChangeLog

	PR libmudflap/53359
	* tree-mudflap.c (mudflap_finish_file): Skip deferred decls
	not found in the symtab.
---

 gcc/tree-mudflap.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)


diff --git a/gcc/tree-mudflap.c b/gcc/tree-mudflap.c
index 90d0448..a9caaf2 100644
--- a/gcc/tree-mudflap.c
+++ b/gcc/tree-mudflap.c
@@ -1335,6 +1335,10 @@ mudflap_finish_file (void)
   if (! TREE_PUBLIC (obj) && ! TREE_ADDRESSABLE (obj))
 continue;
 
+	  /* If we're not emitting the symbol, don't register it.  */
+	  if (!symtab_get_node (obj))
+	continue;
+
   if (! COMPLETE_TYPE_P (TREE_TYPE (obj)))
 {
   warning (OPT_Wmudflap,


-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist  Red Hat Brazil Compiler Engineer


atomic update of profile counters (issue7000044)

2012-12-20 Thread Rong Xu
Hi,

This patch adds support of atomic update of profiles counters. The goal is to 
improve
the poor counter values for highly thread programs. 

The atomic update is under a new option -fprofile-gen-atomic=
N=0: default, no atomic update
N=1: atomic update edge counters.
N=2: atomic update some of value profile counters (currently indirect-call and 
one value profile).
N=3: both edge counter and the above value profile counters.
Other value: fall back to the default.

This patch is a simple porting of the version in google-4_7 branch. It uses 
__atomic_fetch_add
based on Andrew Pinski's suggestion. Note I did not apply to all the value 
profiles as
the indirect-call profile is the most relevant one here.

Test with bootstrap.

Comments and suggestions are welcomed.

Thanks,

-Rong


2012-12-20  Rong Xu  

* libgcc/libgcov.c (__gcov_one_value_profiler_body_atomic): New
function. Atomic update profile counters.
(__gcov_one_value_profiler_atomic): Ditto.
(__gcov_indirect_call_profiler_atomic): Ditto.
* gcc/gcov-io.h: Macros for atomic update.
* gcc/common.opt: New option.
* gcc/tree-profile.c (gimple_init_edge_profiler): Atomic
update profile counters.
(gimple_gen_edge_profiler): Ditto.

Index: libgcc/libgcov.c
===
--- libgcc/libgcov.c(revision 194652)
+++ libgcc/libgcov.c(working copy)
@@ -1113,12 +1113,35 @@ __gcov_one_value_profiler_body (gcov_type *counter
   counters[2]++;
 }
 
+/* Atomic update version of __gcov_one_value_profile_body().  */
+static inline void 
+__gcov_one_value_profiler_body_atomic (gcov_type *counters, gcov_type value)
+{
+  if (value == counters[0])
+GCOV_TYPE_ATOMIC_FETCH_ADD_FN (&counters[1], 1, MEMMODEL_RELAXED);
+  else if (counters[1] == 0)
+{
+  counters[1] = 1; 
+  counters[0] = value;
+}
+  else 
+GCOV_TYPE_ATOMIC_FETCH_ADD_FN (&counters[1], -1, MEMMODEL_RELAXED);
+  GCOV_TYPE_ATOMIC_FETCH_ADD_FN (&counters[2], 1, MEMMODEL_RELAXED);
+}
+
 #ifdef L_gcov_one_value_profiler
 void
 __gcov_one_value_profiler (gcov_type *counters, gcov_type value)
 {
   __gcov_one_value_profiler_body (counters, value);
 }
+
+void
+__gcov_one_value_profiler_atomic (gcov_type *counters, gcov_type value)
+{
+  __gcov_one_value_profiler_body_atomic (counters, value);
+}
+
 #endif
 
 #ifdef L_gcov_indirect_call_profiler
@@ -1153,6 +1176,17 @@ __gcov_indirect_call_profiler (gcov_type* counter,
  && *(void **) cur_func == *(void **) callee_func))
 __gcov_one_value_profiler_body (counter, value);
 }
+
+/* Atomic update version of __gcov_indirect_call_profiler().  */
+void
+__gcov_indirect_call_profiler_atomic (gcov_type* counter, gcov_type value,
+  void* cur_func, void* callee_func)
+{
+  if (cur_func == callee_func
+  || (VTABLE_USES_DESCRIPTORS && callee_func
+  && *(void **) cur_func == *(void **) callee_func))
+__gcov_one_value_profiler_body_atomic (counter, value);
+}
 #endif
 
 
Index: gcc/gcov-io.h
===
--- gcc/gcov-io.h   (revision 194652)
+++ gcc/gcov-io.h   (working copy)
@@ -202,7 +202,15 @@ typedef unsigned gcov_type_unsigned __attribute__
 #endif
 #endif
 
+#if LONG_LONG_TYPE_SIZE > 32
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_8
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_8
+#else
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_4
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_4
+#endif
 
+
 #if defined (TARGET_POSIX_IO)
 #define GCOV_LOCKED 1
 #else
@@ -212,6 +220,18 @@ typedef unsigned gcov_type_unsigned __attribute__
 #else /* !IN_LIBGCOV */
 /* About the host */
 
+#if LONG_LONG_TYPE_SIZE > 32
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_8
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_8
+#else
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_4
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_4
+#endif
+#define PROFILE_GEN_EDGE_ATOMIC (flag_profile_gen_atomic == 1 || \
+ flag_profile_gen_atomic == 3)
+#define PROFILE_GEN_VALUE_ATOMIC (flag_profile_gen_atomic == 2 || \
+  flag_profile_gen_atomic == 3)
+
 typedef unsigned gcov_unsigned_t;
 typedef unsigned gcov_position_t;
 /* gcov_type is typedef'd elsewhere for the compiler */
Index: gcc/common.opt
===
--- gcc/common.opt  (revision 194652)
+++ gcc/common.opt  (working copy)
@@ -1635,6 +1635,15 @@ fprofile-correction
 Common Report Var(flag_profile_correction)
 Enable correction of flow inconsistent profile data input
 
+; fprofile-gen-atomic=0: disable atomically update.
+; fprofile-gen-atomic=1: atomically update edge profile counters.
+; fprofile-gen-atomic=2: atomically update value profil

Re: [PATCH i386]: Enable push/pop in pro/epilogue for modern CPUs

2012-12-20 Thread Xinliang David Li
Ahmad has helped doing some atom performance testing (ChromeOS
benchmarks) with this patch. In summary, there is no statistically
significant regression seen. There is one improvement of about +1.9%
(v8 benchmark) which looks real.

David

On Wed, Dec 12, 2012 at 9:24 AM, Xinliang David Li  wrote:
> On Wed, Dec 12, 2012 at 8:37 AM, Jan Hubicka  wrote:
>>> I noticed in prologue/epilogue, GCC prefers to use MOVs followed by a
>>> SP adjustment instead of a sequence of pushes/pops. The preference to
>>> the MOVs are good for old CPU micro-architectures (before pentium-4,
>>> K10), because it breaks the data dependency.  In modern
>>> micro-architecture, push/pop is implemented using a mechanism called
>>> stack engine. The data dependency is removed by the hardware, and
>>> push/pop becomes very cheap (1 uOp, 1 cycle latency), and they are
>>> smaller. There is no longer the need to avoid using them.   This is
>>> also what ICC does.
>>>
>>> The following patch fixed the problem. It passes bootstrap/regression
>>> test. OK to install?
>>>
>>> thanks,
>>>
>>> David
>>>
>>> Index: config/i386/i386.c
>>> ===
>>> --- config/i386/i386.c (revision 194324)
>>> +++ config/i386/i386.c (working copy)
>>> @@ -1919,10 +1919,10 @@ static unsigned int initial_ix86_tune_fe
>>>m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
>>>
>>>/* X86_TUNE_PROLOGUE_USING_MOVE */
>>> -  m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
>>> +  m_PPRO | m_ATHLON_K8,
>>>
>>>/* X86_TUNE_EPILOGUE_USING_MOVE */
>>> -  m_PPRO | m_CORE2I7 | m_ATOM | m_ATHLON_K8 | m_GENERIC,
>>> +  m_PPRO | m_ATHLON_K8,
>>
>> Push/pops wrt moves was always difficult to tune on old CPUs, so I am happy 
>> it
>> is gone from generic (in fact I had similar patch pending).
>> Are you sure about Atom having stack engine, too?
>>
>
> Good question. The instruction latency table
> (http://www.agner.org/optimize/instruction_tables.pdf) shows that for
> Atom: push r has one 1uop, 1 cycle latency. However the instruction is
> not pairable which will affect ILP. The guide here
> http://www.agner.org/optimize/microarchitecture.pdf does not mention
> Atom has stack engine either.
>
> I will help collect some performance data on Atom.
>
>
> thanks,
>
> David
>
>
>> Related thing is accumulate_outgoing_args. Igor is testing it on Core and I 
>> will
>> give it a try on K10.
>>
>> Honza
>>
>> I am attaching the changes for core costs I made if someone is interested in
>> testing them.  If we can declare P4/PPRo and maybe K8 chips obsolette for
>> generic, there is room for improvement in generic, too. Like using inc/dec
>> again.
>>
>> Honza
>>
>> Index: config/i386/i386.c
>> ===
>> --- config/i386/i386.c  (revision 194452)
>> +++ config/i386/i386.c  (working copy)
>> @@ -1620,14 +1620,14 @@ struct processor_costs core_cost = {
>>COSTS_N_INSNS (8),   /* cost of FABS instruction.  */
>>COSTS_N_INSNS (8),   /* cost of FCHS instruction.  */
>>COSTS_N_INSNS (40),  /* cost of FSQRT instruction.  */
>> -  {{libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
>> -   {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
>> +  {{libcall, {{8192, rep_prefix_4_byte, true}, {-1, libcall, false}}},
>> +   {libcall, {{24, loop, true}, {8192, rep_prefix_8_byte, true},
>>{-1, libcall, false,
>>{{libcall, {{6, loop_1_byte, true},
>>{24, loop, true},
>>{8192, rep_prefix_4_byte, true},
>>{-1, libcall, false}}},
>> -   {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
>> +   {libcall, {{24, loop, true}, {8192, rep_prefix_8_byte, true},
>>{-1, libcall, false,
>>1,   /* scalar_stmt_cost.  */
>>1,   /* scalar load_cost.  */
>> @@ -1806,7 +1806,7 @@ static unsigned int initial_ix86_tune_fe
>>m_PPRO,
>>
>>/* X86_TUNE_PARTIAL_FLAG_REG_STALL */
>> -  m_CORE2I7 | m_GENERIC,
>> +  m_GENERIC | m_CORE2,
>>
>>/* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
>> * on 16-bit immediate moves into memory on Core2 and Corei7.  */
>> @@ -1822,7 +1822,7 @@ static unsigned int initial_ix86_tune_fe
>>m_K6,
>>
>>/* X86_TUNE_USE_CLTD */
>> -  ~(m_PENT | m_ATOM | m_K6),
>> +  ~(m_PENT | m_ATOM | m_K6 | m_GENERIC),
>>
>>/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx.  */
>>m_PENT4,
>> @@ -1901,7 +1901,7 @@ static unsigned int initial_ix86_tune_fe
>>m_COREI7 | m_BDVER,
>>
>>/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
>> -  m_BDVER ,
>> +  m_BDVER,
>>
>>/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and 
>> dependencies
>>   are resolved on SSE register parts instead of whole registers, so we 
>> may
>> @@ -1910,10

Re: [Patch, PR 54128] ira.c change to fix mips bootstrap

2012-12-20 Thread Jakub Jelinek
On Fri, Aug 31, 2012 at 10:58:51AM -0700, Steve Ellcey  wrote:
> Here is my patch to fix the bootstrap comparision failure (PR 54128) on
> MIPS.  The reason for the comparision failure was a difference in
> register usage and I tracked it down to build_insn_chain which checked
> all instructions for register usage in order to set the dead_or_set
> and live_relevant_regs bitmaps instead of checking only non-debug
> instructions.  Changing INSN_P to NONDEBUG_INSN_P in build_insn_chain
> allowed me to bootstrap and caused no regressions.
> 
> OK to checkin?

Given Alex' comments in the PR, the second hunk is definitely ok for trunk,
the first one can be applied too (but you can skip it too if you want, it
shouldn't make a difference).

> 2012-08-31  Steve Ellcey  
> 
>   PR bootstrap/54128
>   * ira.c (build_insn_chain): Check only NONDEBUG instructions for
>   register usage.
> 
> diff --git a/gcc/ira.c b/gcc/ira.c
> index 3825498..477c87b 100644
> --- a/gcc/ira.c
> +++ b/gcc/ira.c
> @@ -3341,7 +3341,7 @@ build_insn_chain (void)
> c->insn = insn;
> c->block = bb->index;
>  
> -   if (INSN_P (insn))
> +   if (NONDEBUG_INSN_P (insn))
>   for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
> {
>   df_ref def = *def_rec;
> @@ -3432,7 +3432,7 @@ build_insn_chain (void)
> bitmap_and_compl_into (live_relevant_regs, elim_regset);
> bitmap_copy (&c->live_throughout, live_relevant_regs);
>  
> -   if (INSN_P (insn))
> +   if (NONDEBUG_INSN_P (insn))
>   for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
> {
>   df_ref use = *use_rec;

Jakub