[AARCH64] AArch64 backport to 4.7

2012-09-12 Thread Sofiane Naci
[AARCH64] AArch64 backport to 4.7.

This series of patches implements a back port of the AArch64 backend
to gcc-4.7.

This patch series is not intended to be applied directly to the
gcc-4_7-branch branch.  We have pushed a new branch into SVN to
host this back port, located at:

 svn://gcc.gnu.org/gcc/branches/ARM/aarch64-4.7-branch

This branch will track 4.7.  Patches to this branch should be tagged
"[AARCH64-4.7]" and must be approved by ARM personnel.

The branch was constructed using the following steps:

. Copied gcc-4_7-branch.
. Copied the AArch64 port from ARM/aarch64-branch.
. Applied the attached AArch64 backport patch.
. Applied the attached backport patch to implement integer iterators.
. Updated config.sub and config.guess from upstream gnuconfig.


Thank you


aarch64-backport.patch
Description: Binary data


aarch64-int-iterators-backport.patch
Description: Binary data


[SH] Correct address cost estimations

2012-09-12 Thread Oleg Endo
Hello,

This corrects the address cost estimations for SH.
Tested on rev 191161 with 
make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"

and no new failures.
With this applied CSiBE shows a total size decrease of 4668 bytes for
'-O2 -m4-single -ml -mpretend-cmove'.

OK?

Cheers,
Oleg

ChangeLog:

* config/sh/sh.c (sh_rtx_costs): Add handling of MEM, 
SIGN_EXTEND, ZERO_EXTEND and PARALLEL cases.
(sh_address_cost): Correct rtx parsing and tweak cost 
estimations.
Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c	(revision 191161)
+++ gcc/config/sh/sh.c	(working copy)
@@ -3196,6 +3196,78 @@
 }
   return false;
 
+/* The cost of a mem access is mainly the cost of the address mode.  */
+case MEM:
+  *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
+true);
+  return true;
+
+/* The cost of a sign or zero extend depends on whether the source is a
+   reg or a mem.  In case of a mem take the address into acount.  */
+case SIGN_EXTEND:
+  if (REG_P (XEXP (x, 0)))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+  if (MEM_P (XEXP (x, 0)))
+	{
+	  *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
+GET_MODE (XEXP (x, 0)),
+MEM_ADDR_SPACE (XEXP (x, 0)), true);
+	  return true;
+	}
+  return false;
+
+case ZERO_EXTEND:
+  if (REG_P (XEXP (x, 0)))
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+  else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
+	   && (GET_MODE (XEXP (x, 0)) == QImode
+		   || GET_MODE (XEXP (x, 0)) == HImode))
+	{
+	  /* Handle SH2A's movu.b and movu.w insn.  */
+	  *total = sh_address_cost (XEXP (XEXP (x, 0), 0), 
+GET_MODE (XEXP (x, 0)), 
+MEM_ADDR_SPACE (XEXP (x, 0)), true);
+	  return true;
+	}
+  return false;
+
+/* mems for SFmode and DFmode can be inside a parallel due to
+   the way the fpscr is handled.  */
+case PARALLEL:
+  for (int i = 0; i < XVECLEN (x, 0); i++)
+	{
+	  rtx xx = XVECEXP (x, 0, i);
+	  if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
+	{
+	  *total = sh_address_cost (XEXP (XEXP (xx, 0), 0), 
+	GET_MODE (XEXP (xx, 0)),
+	MEM_ADDR_SPACE (XEXP (xx, 0)), true);
+	  return true;
+	}
+	  if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
+	{
+	  *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
+	GET_MODE (XEXP (xx, 1)),
+	MEM_ADDR_SPACE (XEXP (xx, 1)), true);
+	  return true;
+	}
+	}
+
+  if (sh_1el_vec (x, VOIDmode))
+	*total = outer_code != SET;
+  else if (sh_rep_vec (x, VOIDmode))
+	*total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
+		  + (outer_code != SET));
+  else
+	*total = COSTS_N_INSNS (3) + (outer_code != SET);
+  return true;
+
 case CONST_INT:
   if (TARGET_SHMEDIA)
 {
@@ -3271,7 +3343,10 @@
   else
 *total = 10;
   return true;
+
 case CONST_VECTOR:
+/* FIXME: This looks broken.  Only the last statement has any effect.
+   Probably this could be folded with the PARALLEL case?  */
   if (x == CONST0_RTX (GET_MODE (x)))
 	*total = 0;
   else if (sh_1el_vec (x, VOIDmode))
@@ -3339,15 +3414,6 @@
   *total = COSTS_N_INSNS (20);
   return true;
 
-case PARALLEL:
-  if (sh_1el_vec (x, VOIDmode))
-	*total = outer_code != SET;
-  if (sh_rep_vec (x, VOIDmode))
-	*total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
-		  + (outer_code != SET));
-  *total = COSTS_N_INSNS (3) + (outer_code != SET);
-  return true;
-
 case FLOAT:
 case FIX:
   *total = 100;
@@ -3430,36 +3496,47 @@
 /* Compute the cost of an address.  */
 
 static int
-sh_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
+sh_address_cost (rtx x, enum machine_mode mode,
 		 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
 {
+  /* Simple reg, post-inc, pre-dec addressing.  */
+  if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
+return 1;
+
   /* 'reg + disp' addressing.  */
-  if (satisfies_constraint_Sdd (x))
+  if (GET_CODE (x) == PLUS
+  && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
 {
-  const HOST_WIDE_INT offset = disp_addr_displacement (x);
-  const enum machine_mode mode = GET_MODE (x);
+  const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
 
-  /* The displacement would fit into a 2 byte move insn.  */
+  if (offset == 0)
+	return 1;
+
+  /* The displacement would fit into a 2 byte move insn.
+	 HImode and QImode loads/stores with displacement put pressure on
+	 R0 which will most likely require another reg copy.  Thus account
+	 a higher cost for that.  */
   if (offset > 0 && offset <= max_mov_insn_displacement (mode, false))
-	return 0;
+	return (mode == HImode || mode == QImode) ? 2 : 1;
 
   /* The displacement would fit into a 4 

Re: Obsolete picochip-* in 4.7.2+

2012-09-12 Thread Richard Guenther
On Tue, Sep 11, 2012 at 5:21 PM, Jakub Jelinek  wrote:
> Hi!
>
> As discussed on IRC, the picochip-* port doesn't have an active maintainer
> anymore, this patch adds it to deprecated ports for 4.7.2+ so that it can be 
> removed in
> GCC 4.8 unless somebody steps up to maintain it.
>
> Ok for trunk/4.7?

Ok.

Thanks,
Richard.

> 2012-09-11  Jakub Jelinek  
>
> * config.gcc: Obsolete picochip-*.
>
> --- gcc/config.gcc  2012-09-05 14:52:14.428548941 +0200
> +++ gcc/config.gcc  2012-09-11 17:05:15.147522191 +0200
> @@ -245,7 +245,8 @@ md_file=
>
>  # Obsolete configurations.
>  case ${target} in
> -   score-* \
> +   picochip-*  \
> + | score-* \
>   )
>  if test "x$enable_obsolete" != xyes; then
>echo "*** Configuration ${target} is obsolete." >&2
>
> --- gcc-4.7/changes.html10 Aug 2012 16:25:46 -  1.124
> +++ gcc-4.7/changes.html11 Sep 2012 15:15:38 -
> @@ -29,7 +29,14 @@
>  next release of GCC will have their sources permanently
>  removed.
>
> -The following ports for individual systems on
> +All GCC ports for the following processor
> +architectures have been declared obsolete:
> +
> +
> + picoChip (picochip-*)
> +
> +
> +The following ports for individual systems on
>  particular architectures have been obsoleted:
>
>  
>
>
> Jakub


Re: [PATCH,mmix] convert to constraints.md

2012-09-12 Thread Nathan Froyd
- Original Message -
> Nathan, again thanks.  There are a few minor tweaks compared to your
> version:

Thanks for fixing this up!

> - Keeping old layout of "mmix_reg_or_8bit_operand".  That looked like
>   a spurious change and I prefer the ior construct to the
>   if_then_else.

ISTR without this change, there were lots of assembly changes like:

set rx, 6
cmp rz, ry, rx

instead of the previous and better:

cmp rz, ry, 6

(apologies if the assembly syntax isn't correct; hopefully the intent is clear)

but if you double-checked that the assembly didn't change after your changes, 
maybe something else that you tweaked fixed this.

> - Replacing undefined-constraint-"H" with "I" instead of removing it.
>   It was either renamed early or a genuine typo.  Good catch.

Hard not to see it; the gen* machinery complains about undefined constraints. :)

-Nathan


Re: [C++ Patch] Remove uses of ATTRIBUTE_UNUSED in the function parameters

2012-09-12 Thread Richard Guenther
On Tue, Sep 11, 2012 at 5:37 PM, Jakub Jelinek  wrote:
> On Tue, Sep 11, 2012 at 05:29:12PM +0200, Paolo Carlini wrote:
>> PS: slightly interesting, in a couple of cases -
>> write_unnamed_type_name, wrap_cleanups_r - the parameters were
>> actually used.
>
> Just a general comment, often an argument is only conditionally used,
> e.g. depending on some preprocessor macro (e.g. target hook).  In that
> case unnamed parameter is not an option, but dropping ATTRIBUTE_UNUSED is
> not desirable either.

And note that we have ARG_UNUSED for parameters - to cope with older
compilers not handling attributes here too well (I run into this when using
gcc 3.3 as host compiler).

Richard.

> Jakub


Re: Backtrace library [3/3]

2012-09-12 Thread Florian Weimer

On 09/12/2012 12:55 AM, Ian Lance Taylor wrote:

I have finished the initial implementation of the backtrace library I
proposed at http://gcc.gnu.org/ml/gcc/2012-08/msg00317.html .  I've
separated the work into three patches.  These patches only implement the
backtrace library itself; actual use of the library will follow in
separate patches.


I'm trying to add a few comments below.  I hope Thunderbird does not 
garble them too much.


+backtrace_open (const char *filename, backtrace_error_callback 
error_callback,

+   void *data)
+{
+  int descriptor;
+
+  descriptor = open (filename, O_RDONLY | O_CLOEXEC);
+  if (descriptor < 0)
+{
+  error_callback (data, filename, errno);
+  return -1;
+}
+  if (O_CLOEXEC == 0)
+{
+  /* It doesn't matter if this fails for some reason.  */
+  fcntl (descriptor, F_SETFD, FD_CLOEXEC);
+}

You should call fcntl unconditionally.  O_CLOEXEC might be non-zero 
during build, but could still be ignored by the kernel.


+static void
+fileline_initialize (backtrace_error_callback error_callback, void *data)
+{
...
+  if (executable_name != NULL)
+descriptor = backtrace_open (executable_name, error_callback, data);
+  else
+descriptor = backtrace_open ("/proc/self/exe", error_callback, data);

You should try getauxval(AT_EXECFN) as well (needs recent glibc), so 
that this works with a mounted /proc.


This library should only be used when getauxval(AT_SECURE) is zero, so 
that the program doesn't try to read files with elevated privileges to 
which the original user wouldn't have access.  I don't think this has to 
be addressed within the library itself.


Adding /usr/lib/debug support shouldn't be too hard, I will try to 
figure out the required path transformations (which are somewhat 
system-specific).


--
Florian Weimer / Red Hat Product Security Team


Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Richard Guenther
On Tue, Sep 11, 2012 at 5:32 PM, Michael Matz  wrote:
> Hi,
>
> On Tue, 11 Sep 2012, Dehao Chen wrote:
>
>> Looks like we have two choices:
>>
>> 1. Stream out block info, and use LTO_SET_PREVAIL for TREE_CHAIN(t)
>
> This will actually not work correctly in some cases.  The problem is, if
> the prevailing decl is already part of another chain (say in another
> block_var list) you would break the current chain.  Hence block vars need
> special handling in the lto streamer (another reason why tree_chain is not
> the most clever think to use for this chain).  This problem area needs to
> be solved somehow if block info is to be preserved correctly.

Well.  The issue is that at present we stream BLOCKs in the function section
via its DECL_INTIAL.  Which means we _never_ should get a non-prevailing
DECL in BLOCK_VARS.  You need to debug why that doesn't work anymore.
Possibly the BLOCK leaks into decls it should not leak to via the location
mechanism?

>> 2. Don't stream out block info for LTO, and still call LTO_NO_PREVAIL
>> (TREE_CHAIN (t)).
>
> That's also a large hammer as it basically will mean no debug info after
> LTO :-/ Sigh, at this point I have no good solution that doesn't involve
> quite some work, perhaps your hack is good enough for the time being,
> though I hate it :)

It hides a bug.  If we replace anything in BLOCK_VARS then the risk is
that you generate an infinite chain in some BLOCK_VARS list and thus
get infinite loops somewhere in the compiler.

So, no, that's not an option.

Richard.

>
> Ciao,
> Michael.


Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Richard Guenther
On Wed, Sep 12, 2012 at 7:06 AM, Dehao Chen  wrote:
> Now I think we are facing a more complex problem. The data structure
> we use to store the location_adhoc_data are file-static in linemap.c
> in libcpp. These data structures are not guarded by GTY(()).
> Meanwhile, as we have removed the block data structure from
> gimple.gsbase as well as tree.exp (encoding them into an location_t).
> This could cause block being GCed and the LOCATION_BLOCK becoming
> dangling pointers.

Uh.  Note that it is quite important that we are able to garbage-collect unused
BLOCKs, this is the whole point of removing unused BLOCK scopes in
remove_unused_locals.  So this indeed becomes much more complicated ...
What would be desired is that the garbage collector can NULL an entry in
the mapping table when it is not referenced in any other way (that other
reference would be the BLOCK tree as stored in a FUNCTION_DECLs DECL_INITIAL).

> I tried to manipulate GTY to make it recognize the LOCATION_BLOCK from
> gimple.gsbase.location. However, neigher nested_ptr nor mark_hook can
> help me.
>
> Another approach would be guard the location_adhoc_data and related
> data structures in GTY(()). However, this is non-trivial because tree
> is not visible in libcpp. At the same time, my implementation heavily
> relies on hashtable to make the code efficient, thus it's quite tricky
> to make "param_is" and "use_params" work.
>
> The final approach, which I'll try tomorrow, would be move all my
> implementation from libcpp to gcc, and guard them with GTY(()). I
> still haven't thought of any potential problem of this approach. Any
> comments?

I think moving the mapping to GC in a lazy manner as I described above
would be the way to go.  For hashtables GC already supports if_marked,
not sure if similar support is available for arrays/vecs.

Richard.

> Thanks,
> Dehao
>
> On Tue, Sep 11, 2012 at 9:00 AM, Dehao Chen  wrote:
>> I saw comments in tree-streamer-out.c:
>>
>>   /* Do not stream BLOCK_SOURCE_LOCATION.  We cannot handle debug information
>>  for early inlining so drop it on the floor instead of ICEing in
>>  dwarf2out.c.  */
>>   streamer_write_chain (ob, BLOCK_VARS (expr), ref_p);
>>
>> However, what the code is doing seemed contradictory with the comment.
>> Or am I missing something?
>>
>>
>>
>> On Tue, Sep 11, 2012 at 8:32 AM, Michael Matz  wrote:
>>> Hi,
>>>
>>> On Tue, 11 Sep 2012, Dehao Chen wrote:
>>>
 Looks like we have two choices:

 1. Stream out block info, and use LTO_SET_PREVAIL for TREE_CHAIN(t)
>>>
>>> This will actually not work correctly in some cases.  The problem is, if
>>> the prevailing decl is already part of another chain (say in another
>>> block_var list) you would break the current chain.  Hence block vars need
>>> special handling in the lto streamer (another reason why tree_chain is not
>>> the most clever think to use for this chain).  This problem area needs to
>>> be solved somehow if block info is to be preserved correctly.
>>>
 2. Don't stream out block info for LTO, and still call LTO_NO_PREVAIL
 (TREE_CHAIN (t)).
>>>
>>> That's also a large hammer as it basically will mean no debug info after
>>> LTO :-/ Sigh, at this point I have no good solution that doesn't involve
>>> quite some work, perhaps your hack is good enough for the time being,
>>> though I hate it :)
>>
>> I got it. Then I'll keep the patch as it is (remove the
>> LTO_NO_PREVAIL), and work with Honza to resolve the issue he had, and
>> then we should be good to check in?
>>
>> Thanks,
>> Dehao
>>
>>>
>>>
>>> Ciao,
>>> Michael.


[v3, build] Clear hardware capabilities on libstdc++.so with Sun as

2012-09-12 Thread Rainer Orth
Since the use of rdrand was introduced in src/c++11/random.cc, all
execution tests involving libstdc++.so.6 fail on Solaris 10 and 11/x86
with a sufficiently recent native assembler that supports rdrand: either
Solaris 10/x86 patch 119961-11 or Solaris 11.1 builds (haven't checked
which one).  The problem is that as tags src/c++11/random.o as needing
RDRAND support:

% file random.o
random.o:   ELF 32-bit LSB relocatable 80386 Version 1 [CMOV RDRAND]

which is propagated to libstdc++.so.6 and causes the runtime linker to
fail the execution if the hardware doesn't support that hardware
capability, although rdrand is executed only if the cpuid indicates the
support is present.

The usual solution so far has been to clear the hardware capability
using a linker map (as in libitm, cf. libitm/clearcap.map).
Unfortunately, this doesn't work here: as can be seen with elfdump,
RDRAND is set in a second mask (CA_SUNW_HW_2) since all bits in
CA_SUNW_HW_1 are already used:

% elfdump -H random.o

Capabilities Section:  .SUNW_cap

 Object Capabilities:
 index  tag   value
   [0]  CA_SUNW_HW_1 0x20  [ CMOV ]
   [1]  CA_SUNW_HW_2 0x1  [ RDRAND ]

The old (v1) linker map syntax has no support for clearing that
bit/mask, and while the v2 map syntax does, we cannot universally assume
it's present on Solaris 10: while recent linker patches include it,
older ones don't and ld and as can be updated independently.

So I've settled for a different solution instead: Sun assemblers with
hardware capability support also have a -nH switch to suppress their
generation, thus I'm testing for that and use it if possible.

This is exactly what the following patch does.

Bootstrapped without regressions on i386-pc-solaris2.1[01] with affected
versions of Sun as and gas 2.22.  Results with gas are unchanged, with
Sun as all failures are gone.  x86_64-unknown-linux-gnu bootstrap is
running to make sure nothing breaks there.

Ok for mainline if that passes?

Rainer


2012-09-11  Rainer Orth  

* acinclude.m4 (GLIBCXX_CHECK_ASSEMBLER_HWCAP): Define.
* configure.ac: Call GLIBCXX_CHECK_ASSEMBLER_HWCAP.
* fragment.am (CONFIG_CXXFLAGS): Add $(HWCAP_FLAGS).
* configure: Regenerate.
* Makefile.in: Regenerate.
* doc/Makefile.in: Regenerate.
* include/Makefile.in: Regenerate.
* libsupc++/Makefile.in: Regenerate.
* po/Makefile.in: Regenerate.
* python/Makefile.in: Regenerate.
* src/Makefile.in: Regenerate.
* src/c++11/Makefile.in: Regenerate.
* src/c++98/Makefile.in: Regenerate.
* testsuite/Makefile.in: Regenerate.

# HG changeset patch
# Parent cc6aab46be72c37bfdfccd786ed5c332a7ce4cd9
Clear hardware capabilities on libstdc++.so with Sun as

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -169,6 +169,32 @@ AC_DEFUN([GLIBCXX_CHECK_COMPILER_FEATURE
 
 
 dnl
+dnl Check if the assembler used supports disabling generation of hardware
+dnl capabilities.  This is only supported by Sun as at the moment.
+dnl
+dnl Defines:
+dnl  HWCAP_FLAGS='-Wa,-nH' if possible.
+dnl
+AC_DEFUN([GLIBCXX_CHECK_ASSEMBLER_HWCAP], [
+  test -z "$HWCAP_FLAGS" && HWCAP_FLAGS=''
+
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -Wa,-nH"
+
+  AC_MSG_CHECKING([for as that supports -Wa,-nH])
+  AC_TRY_COMPILE([], [return 0;], [ac_hwcap_flags=yes],[ac_hwcap_flags=no])
+  if test "$ac_hwcap_flags" = "yes"; then
+HWCAP_FLAGS="-Wa,-nH $HWCAP_FLAGS"
+  fi
+  AC_MSG_RESULT($ac_hwcap_flags)
+
+  CFLAGS="$ac_save_CFLAGS"
+
+  AC_SUBST(HWCAP_FLAGS)
+])
+
+
+dnl
 dnl If GNU ld is in use, check to see if tricky linker opts can be used.  If
 dnl the native linker is in use, all variables will be defined to something
 dnl safe (like an empty string).
diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
--- a/libstdc++-v3/configure.ac
+++ b/libstdc++-v3/configure.ac
@@ -333,6 +333,9 @@ case "$target" in
 esac
 GLIBCXX_CONDITIONAL(GLIBCXX_LDBL_COMPAT, test $ac_ldbl_compat = yes)
 
+# Check if assembler supports disabling hardware capability support.
+GLIBCXX_CHECK_ASSEMBLER_HWCAP
+
 # Check if assembler supports rdrand opcode.
 GLIBCXX_CHECK_X86_RDRAND
 
diff --git a/libstdc++-v3/fragment.am b/libstdc++-v3/fragment.am
--- a/libstdc++-v3/fragment.am
+++ b/libstdc++-v3/fragment.am
@@ -22,7 +22,7 @@ endif
 # These bits are all figured out from configure.  Look in acinclude.m4
 # or configure.ac to see how they are set.  See GLIBCXX_EXPORT_FLAGS.
 CONFIG_CXXFLAGS = \
-	$(SECTION_FLAGS) $(EXTRA_CXX_FLAGS) -frandom-seed=$@
+	$(SECTION_FLAGS) $(HWCAP_FLAGS) $(EXTRA_CXX_FLAGS) -frandom-seed=$@
 WARN_CXXFLAGS = \
 	$(WARN_FLAGS) $(WERROR_FLAG) -fdiagnostics-show-location=once 
 

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [Fortran, Patch] PR 54225 - Fix ice-on-invalid-code with "*" in array refs

2012-09-12 Thread Mikael Morin
On 11/09/2012 07:54, Tobias Burnus wrote:
> This patch fixes a GCC 4.7/4.8 regression for invalid code.
> 
> Build and regtested on x86-64-linux.
> OK for the trunk and 4.7?
> 

Yes.
PR 53306 is also fixed by your patch according to Dominique, so don't
forget to include its testcase and to add the PR reference in the
ChangeLogs.
Thanks

Mikael


Re: [C++ Patch] Remove uses of ATTRIBUTE_UNUSED in the function parameters

2012-09-12 Thread Paolo Carlini

On 09/12/2012 10:57 AM, Richard Guenther wrote:
And note that we have ARG_UNUSED for parameters - to cope with older 
compilers not handling attributes here too well (I run into this when 
using gcc 3.3 as host compiler).
Ah, thanks, I didn't know that (both the problem and the solution): it 
seems (another) good reason to just get rid of as many ATTRIBUTE_UNUSED 
as possible!


Paolo.


Re: [PATCH,mmix] convert to constraints.md

2012-09-12 Thread Hans-Peter Nilsson
On Wed, 12 Sep 2012, Nathan Froyd wrote:
> - Original Message -
> > Nathan, again thanks.  There are a few minor tweaks compared to your
> > version:
>
> Thanks for fixing this up!
>
> > - Keeping old layout of "mmix_reg_or_8bit_operand".  That looked like
> >   a spurious change and I prefer the ior construct to the
> >   if_then_else.
>
> ISTR without this change, there were lots of assembly changes like:
>
> set rx, 6
> cmp rz, ry, rx
>
> instead of the previous and better:
>
> cmp rz, ry, 6
>
> (apologies if the assembly syntax isn't correct; hopefully the intent is 
> clear)

Yes.  My only guess is a typo in your previous edit, as the two
constructs certainly should be equivalent in *what's* being
recognized.  If they aren't, we have a bug in the gen*
machinery.  If "my" construct is wrong, then that's a separate
bug-fix, ehm.  Seems worth it to double-check, not just for
the sake of MMIX.

> but if you double-checked that the assembly didn't change
> after your changes, maybe something else that you tweaked fixed this.

I have no clue; nothing in the patch below stands out - the
missing "I"-constraint that may explain this (modulo that "H"
wasn't recognizable either) was on an "or" operation, not a
compare.  But maybe one was close enough that it mattered.

What revision was your baseline?  My baseline was r190260 but
configure-patches as posted (required to build, affecting
crtstuff at most) and with:

I'll try with your original patch and see it I can spot
something.

Index: gcc/config/mmix/predicates.md
===
--- gcc/config/mmix/predicates.md   (revision 190682)
+++ gcc/config/mmix/predicates.md   (working copy)
@@ -118,7 +118,7 @@ (define_predicate "mmix_symbolic_or_addr
return 1;
   /* Fall through.  */
 default:
-  return address_operand (op, mode);
+  return mmix_extra_constraint (op, 'U', reload_in_progress || 
reload_completed);
 }
 })

Index: gcc/config/mmix/mmix.md
===
--- gcc/config/mmix/mmix.md (revision 190682)
+++ gcc/config/mmix/mmix.md (working copy)
@@ -274,7 +275,7 @@ (define_insn "anddi3"
 (define_insn "iordi3"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
(ior:DI (match_operand:DI 1 "register_operand" "%r,0")
-   (match_operand:DI 2 "mmix_reg_or_constant_operand" "rH,LS")))]
+   (match_operand:DI 2 "mmix_reg_or_constant_operand" "rI,LS")))]
   ""
   "@
OR %0,%1,%2

> > - Replacing undefined-constraint-"H" with "I" instead of removing it.
> >   It was either renamed early or a genuine typo.  Good catch.
>
> Hard not to see it; the gen* machinery complains about undefined constraints. 
> :)

Exactly! :)  You don't congratulate the machine, but the
machinist - and sometimes the inventor of the machine.
(Thank you, Zack!)

brgds, H-P


[PATCH] Fix PR54553

2012-09-12 Thread Richard Guenther

This makes sure -finline is saved across optimize attribute changes
(-O0 changes it).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2012-09-12  Richard Guenther  

PR middle-end/54553
* common.opt (finline): Mark with Optimization.

Index: gcc/common.opt
===
--- gcc/common.opt  (revision 191209)
+++ gcc/common.opt  (working copy)
@@ -1289,7 +1289,7 @@ Perform indirect inlining
 ; General flag to enable inlining.  Specifying -fno-inline will disable
 ; all inlining apart from always-inline functions.
 finline
-Common Report Var(flag_no_inline,0) Init(0)
+Common Report Var(flag_no_inline,0) Init(0) Optimization
 Enable inlining of function declared \"inline\", disabling disables all 
inlining
 
 finline-small-functions


Re: [PATCH] Add option for dumping to stderr (issue6190057)

2012-09-12 Thread Richard Guenther
On Wed, Sep 12, 2012 at 12:30 PM, Richard Guenther
 wrote:
> On Wed, Sep 12, 2012 at 10:12 AM, Sharad Singhai  wrote:
>> Thanks for your comments. Please see my responses inline.
>>
>> On Tue, Sep 11, 2012 at 1:16 PM, Xinliang David Li  
>> wrote:
>>> Can you resend your patch in text form (also need to resolve the
>>> latest conflicts) so that it can be commented inline?
>>
>> I tried to include inline patch earlier but my message was bounced
>> back from patches mailing list. I am trying it again.
>>
>>> Please also provide as summary a more up-to-date description of
>>> 1) Command line option syntax and semantics
>>
>> I added some documentation in the patch. Here are the relevant bits
>> from invoke.texi.
>>
>> `-fdump-tree-SWITCH-OPTIONS=FILENAME'
>>  Control the dumping at various stages of processing the
>>  intermediate language tree to a file.  The file name is generated
>>  by appending a switch-specific suffix to the source file name, and
>>  the file is created in the same directory as the output file. In
>>  case of `=FILENAME' option, the dump is output on the given file
>>  instead of the auto named dump files.
>>  ...
>>
>> `=FILENAME'
>>   Instead of an auto named dump file, output into the given file
>>   name. The file names `stdout' and `stderr' are treated
>>   specially and are considered already open standard streams.
>>   For example,
>>
>>gcc -O2 -ftree-vectorize -fdump-tree-vect-details=foo.dump
>> -fdump-tree-pre=stderr file.c
>>
>>   outputs vectorizer dump into `foo.dump', while the PRE dump
>>   is output on to `stderr'. If two conflicting dump filenames
>>   are given for the same pass, then the latter option
>>   overrides the earlier one.
>>
>> `-fopt-info-PASS'
>> `-fopt-info-PASS-OPTIONS'
>> `-fopt-info-PASS-OPTIONS=FILENAME'
>>  Controls optimization dumps from various passes. If the `-OPTIONS'
>>  form is used, OPTIONS is a list of `-' separated options which
>>  controls the details of the dump.  If OPTIONS is not specified, it
>>  defaults to `optimized'. If the FILENAME is not specified, it
>>  defaults to `stderr'. Note that the output FILENAME will be
>>  overwritten in case of multiple translation units. If a combined
>>  output from multiple the translation units is desired, `stderr'
>>  should be used instead.
>>
>>  The PASS could be one of the tree or rtl passes. The following
>>  options are available
>
> I don't like that we have -PASS here.  That makes it awfully similar
> to -fdump-PASS-OPTIONS=FILENAME.  Are we merely having
> -fopt-info because OPTIONS are "different"?
>
>> `optimized'
>>   Print information when a particular optimization is
>>   successfully applied. It is up to the pass to decide which
>>   information is relevant. For example, the vectorizer pass
>>   prints the location of loop which got vectorized.
>>
>> `missed'
>>   Print information about missed optimizations. Individual
>>   passes control which information to include in the output.
>>   For example,
>>
>>gcc -O2 -ftree-vectorize -fopt-info-tree-vect-missed
>
> At least for -PASS better names should be available.  And given the
> lack of pass support for the new scheme (apart from the vectorizer)
> we should enumerate the set of -PASS values we accept, currently
> -vec only.  IMHO it does not make sense to provide -fopt-info for
> the myriad of passes we have - usually only high-level ones are interesting
> to the user?
>
>>   will print information about missed vectorization
>>   opportunities on to stderr.
>>
>> `note'
>>   Print verbose information about optimizations, such as certain
>>   transformations, more detailed information about decisions
>>   etc.
>>
>> `details'
>>   Print detailed information from a particular pass. This
>>   includes OPTIMIZED, MISSED, and NOTE. For example,
>>
>>gcc -O2 -ftree-vectorize
>> -fopt-info-tree-vect-details=vect.details
>>
>>   outputs detailed optimization report from the vectorization
>>   pass into `vect.details'.
>
> Can options be chained?  like -fopt-info-vec-missed-note?
>
>> `-fopt-info-tree-all'
>> `-fopt-info-tree-all-OPTIONS'
>> `-fopt-info-tree-all-OPTIONS=FILENAME'
>>  This is similar to `-fopt-info' but instead of a single pass, it
>>  applies the dump options to all the tree passes. If the FILENAME
>>  is provided, the dump from all the passes is concatenated,
>>  otherwise the dump is output onto `stderr'. If OPTIONS is omitted,
>>  it defaults to `optimized'.
>>
>>   gcc -O3 -fopt-info-tree-all-optimized-missed=tree.optdump
>>
>>  This will output information about missed optimizations as well as
>>  optimized locations from all the tree pas

Unreviewed bootstrap patch

2012-09-12 Thread Rainer Orth
This patch

Fix Solaris 9/x86 bootstrap
http://gcc.gnu.org/ml/gcc-patches/2012-08/msg01494.html

has remained unreviewed for 3 weeks.  It is necessary to fix Solaris
9/x86 bootstrap after the cxx-conversion merge.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [v3, build] Clear hardware capabilities on libstdc++.so with Sun as

2012-09-12 Thread Paolo Bonzini
Il 12/09/2012 11:26, Rainer Orth ha scritto:
> Since the use of rdrand was introduced in src/c++11/random.cc, all
> execution tests involving libstdc++.so.6 fail on Solaris 10 and 11/x86
> with a sufficiently recent native assembler that supports rdrand: either
> Solaris 10/x86 patch 119961-11 or Solaris 11.1 builds (haven't checked
> which one).  The problem is that as tags src/c++11/random.o as needing
> RDRAND support:
> 
> % file random.o
> random.o:   ELF 32-bit LSB relocatable 80386 Version 1 [CMOV RDRAND]
> 
> which is propagated to libstdc++.so.6 and causes the runtime linker to
> fail the execution if the hardware doesn't support that hardware
> capability, although rdrand is executed only if the cpuid indicates the
> support is present.
> 
> The usual solution so far has been to clear the hardware capability
> using a linker map (as in libitm, cf. libitm/clearcap.map).
> Unfortunately, this doesn't work here: as can be seen with elfdump,
> RDRAND is set in a second mask (CA_SUNW_HW_2) since all bits in
> CA_SUNW_HW_1 are already used:
> 
> % elfdump -H random.o
> 
> Capabilities Section:  .SUNW_cap
> 
>  Object Capabilities:
>  index  tag   value
>[0]  CA_SUNW_HW_1 0x20  [ CMOV ]
>[1]  CA_SUNW_HW_2 0x1  [ RDRAND ]
> 
> The old (v1) linker map syntax has no support for clearing that
> bit/mask, and while the v2 map syntax does, we cannot universally assume
> it's present on Solaris 10: while recent linker patches include it,
> older ones don't and ld and as can be updated independently.
> 
> So I've settled for a different solution instead: Sun assemblers with
> hardware capability support also have a -nH switch to suppress their
> generation, thus I'm testing for that and use it if possible.
> 
> This is exactly what the following patch does.
> 
> Bootstrapped without regressions on i386-pc-solaris2.1[01] with affected
> versions of Sun as and gas 2.22.  Results with gas are unchanged, with
> Sun as all failures are gone.  x86_64-unknown-linux-gnu bootstrap is
> running to make sure nothing breaks there.
> 
> Ok for mainline if that passes?
> 
>   Rainer
> 
> 
> 2012-09-11  Rainer Orth  
> 
>   * acinclude.m4 (GLIBCXX_CHECK_ASSEMBLER_HWCAP): Define.
>   * configure.ac: Call GLIBCXX_CHECK_ASSEMBLER_HWCAP.
>   * fragment.am (CONFIG_CXXFLAGS): Add $(HWCAP_FLAGS).
>   * configure: Regenerate.
>   * Makefile.in: Regenerate.
>   * doc/Makefile.in: Regenerate.
>   * include/Makefile.in: Regenerate.
>   * libsupc++/Makefile.in: Regenerate.
>   * po/Makefile.in: Regenerate.
>   * python/Makefile.in: Regenerate.
>   * src/Makefile.in: Regenerate.
>   * src/c++11/Makefile.in: Regenerate.
>   * src/c++98/Makefile.in: Regenerate.
>   * testsuite/Makefile.in: Regenerate.
> 
> 
> 
> 

Ok.

Paolo


Re: shrink-wrapping duplicates BBs across partitions.

2012-09-12 Thread Christian Bruel
The problem stems from tree-ssa-tail-merge that breaks bb->count, The
CFG looks like

 2
   /  \
  /6
 5 (0) |
 | 3 <-
 |/   \   |
 |   7 (1)  8 -
 | /
  4 (1)

(in parenthesis the bb->count from gcov)

 2
   /  \
  /6
 /  |
 |  3 <--
 | / |  |
 5 (0)   8 --
 |
 |
  4 (1)

so 5 and 4 are now in different partitions, producing an assertion
because there is no EDGE_CROSSING between them.

I can see 3 solutions to this

1) merge the BB counts in tree-ssa-tail-merge.c, so 5 is in the same
partition that 4

2) don't tail-merge blocks that belong to different partitions.

3) add a EDGE_CROSSING flag on the edge between 4 and 5.

1) fixes the problem, so 5 and 4 are now in the same partition. The fix
is quite trivial, as with attached.

the other solution 2) is more conservative, and also fixes the problem.

I don't think 3) is necessary.

more ideas ?

thanks,

Christian


On 09/11/2012 06:21 PM, Jakub Jelinek wrote:
> On Tue, Sep 11, 2012 at 05:40:30PM +0200, Steven Bosscher wrote:
>> On Tue, Sep 11, 2012 at 5:31 PM, Christian Bruel  
>> wrote:
>>> Actually, the edge is fairly simple. I have
>>>
>>> BB5 (BB_COLD_PARTITION) -> BB10 (BB_HOT_PARTITION) -> EXIT
>>>
>>> and BB10 has no other incoming edges. and we are duplicating it.
>>
>> That is wrong, should never happen. Is there a test case to play with?
>> It'd be good to have a PR for this.
> 
> Isn't that the standard case when !HAVE_return ?  Then you can have only a
> single return through epilogue, and when the epilogue is in the hot
> partition, even if cold code is returning, it needs to jump to the epilogue.
> 
>   Jakub
> 
Index: tree-ssa-tail-merge.c
===
--- tree-ssa-tail-merge.c	(revision 191129)
+++ tree-ssa-tail-merge.c	(working copy)
@@ -1478,6 +1478,8 @@
 bb2->frequency = BB_FREQ_MAX;
   bb1->frequency = 0;
 
+  bb2->count += bb1->count;
+
   /* Do updates that use bb1, before deleting bb1.  */
   release_last_vdef (bb1);
   same_succ_flush_bb (bb1);


Re: Unreviewed bootstrap patch

2012-09-12 Thread Richard Guenther
On Wed, 12 Sep 2012, Rainer Orth wrote:

> This patch
> 
>   Fix Solaris 9/x86 bootstrap
> http://gcc.gnu.org/ml/gcc-patches/2012-08/msg01494.html
> 
> has remained unreviewed for 3 weeks.  It is necessary to fix Solaris
> 9/x86 bootstrap after the cxx-conversion merge.

Ok.

Thanks,
Richard.


Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Michael Matz
Hi,

On Wed, 12 Sep 2012, Richard Guenther wrote:

> > This will actually not work correctly in some cases.  The problem is, 
> > if the prevailing decl is already part of another chain (say in 
> > another block_var list) you would break the current chain.  Hence 
> > block vars need special handling in the lto streamer (another reason 
> > why tree_chain is not the most clever think to use for this chain).  
> > This problem area needs to be solved somehow if block info is to be 
> > preserved correctly.
> 
> Well.  The issue is that at present we stream BLOCKs in the function 
> section via its DECL_INTIAL.  Which means we _never_ should get a 
> non-prevailing DECL in BLOCK_VARS.  You need to debug why that doesn't 
> work anymore.

The assert that triggers tests that there's no var_decl in TREE_CHAIN.  
It doesn't test that it's a prevailing decl.  So we could assert that 
instead of the current check.

> >> 2. Don't stream out block info for LTO, and still call LTO_NO_PREVAIL
> >> (TREE_CHAIN (t)).
> >
> > That's also a large hammer as it basically will mean no debug info after
> > LTO :-/ Sigh, at this point I have no good solution that doesn't involve
> > quite some work, perhaps your hack is good enough for the time being,
> > though I hate it :)
> 
> It hides a bug.  If we replace anything in BLOCK_VARS then the risk is
> that you generate an infinite chain in some BLOCK_VARS list and thus
> get infinite loops somewhere in the compiler.

That's what I said for using SET_PREVAIL.  But his hack would specifically 
not replace anything in TREE_CHAIN (and hence BLOCK_VARS), and indeed not 
check anything either.


Ciao,
Michael.


Re: Change double_int calls to new interface.

2012-09-12 Thread Mark Kettenis
> Date: Tue, 11 Sep 2012 17:03:39 -0700
> From: Ian Lance Taylor 
> 
> On Tue, Sep 11, 2012 at 3:12 PM, Lawrence Crowl  wrote:
> > On 9/11/12, Andreas Schwab  wrote:
> >> Mark Kettenis  writes:
> >>> In file included from ../../../src/gcc/gcc/mcf.c:47:0:
> >>> ../../../src/gcc/gcc/mcf.c: In function 'void dump_fixup_edge(FILE*,
> >>> fixup_graph_type*, fixup_edge_p)':
> >>> ../../../src/gcc/gcc/system.h:288:78: error: integer overflow in
> >>> expression [-Werror=overflow]
> >>
> >> This is PR54528.
> >
> > The expression itself looks correct.  I have not been able to
> > duplicate the problem on x86.  I am now waiting on access to the
> > compile farm for access to a hppa system.  Does anyone have more
> > specific information on the condition that generates the error?
> 
> I haven't tried, but I bet you can get it to happen if you build a
> 32-bit compiler--that is, build a compiler using -m32 so that the
> compiler itself runs 32 bit code.

I don't think that helps since I don't see the problem on OpenBSD/i386
(i386-unknown-openbsd5.2) whith a strictly 32-bit compiler.  As I
wrote earlier, I suspect the key factor is HOST_WIDE_INT being 32-bit,
which means 32-bit architectures that don't sey need_64bit_hwint in
config.gcc.  The fact that m68k is affected as well strengthens my
suspicion.  So I expect arm to show the problem as well.  But people
probably haven't noticed since they cross-compile.

Anyway the "diff" below seems to fix the issue.  Guess the replacement
doesn't work!  Should be a big enough clue for Lawrence to come up
with a proper fix.

Index: fold-const.c
===
--- fold-const.c(revision 191150)
+++ fold-const.c(working copy)
@@ -982,13 +982,15 @@
   break;
 
 case MINUS_EXPR:
-/* FIXME(crowl) Remove this code if the replacment works.
+  /* FIXME(crowl) Remove this code if the replacment works.  */
+#if 1
   neg_double (op2.low, op2.high, &res.low, &res.high);
   add_double (op1.low, op1.high, res.low, res.high,
  &res.low, &res.high);
   overflow = OVERFLOW_SUM_SIGN (res.high, op2.high, op1.high);
-*/
+#else
   res = op1.add_with_sign (-op2, false, &overflow);
+#endif
   break;
 
 case MULT_EXPR:




Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Richard Guenther
On Wed, Sep 12, 2012 at 2:14 PM, Michael Matz  wrote:
> Hi,
>
> On Wed, 12 Sep 2012, Richard Guenther wrote:
>
>> > This will actually not work correctly in some cases.  The problem is,
>> > if the prevailing decl is already part of another chain (say in
>> > another block_var list) you would break the current chain.  Hence
>> > block vars need special handling in the lto streamer (another reason
>> > why tree_chain is not the most clever think to use for this chain).
>> > This problem area needs to be solved somehow if block info is to be
>> > preserved correctly.
>>
>> Well.  The issue is that at present we stream BLOCKs in the function
>> section via its DECL_INTIAL.  Which means we _never_ should get a
>> non-prevailing DECL in BLOCK_VARS.  You need to debug why that doesn't
>> work anymore.
>
> The assert that triggers tests that there's no var_decl in TREE_CHAIN.
> It doesn't test that it's a prevailing decl.  So we could assert that
> instead of the current check.
>
>> >> 2. Don't stream out block info for LTO, and still call LTO_NO_PREVAIL
>> >> (TREE_CHAIN (t)).
>> >
>> > That's also a large hammer as it basically will mean no debug info after
>> > LTO :-/ Sigh, at this point I have no good solution that doesn't involve
>> > quite some work, perhaps your hack is good enough for the time being,
>> > though I hate it :)
>>
>> It hides a bug.  If we replace anything in BLOCK_VARS then the risk is
>> that you generate an infinite chain in some BLOCK_VARS list and thus
>> get infinite loops somewhere in the compiler.
>
> That's what I said for using SET_PREVAIL.  But his hack would specifically
> not replace anything in TREE_CHAIN (and hence BLOCK_VARS), and indeed not
> check anything either.

Hm, but we shouldn't end up streaming any BLOCKs at this point (nor local
TYPE_DECLs).  Those are supposed to be in the local function sections only
where no fixup for prevailing decls happens.

Richard.

>
> Ciao,
> Michael.


Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Michael Matz
Hi,

On Wed, 12 Sep 2012, Richard Guenther wrote:

> >> It hides a bug.  If we replace anything in BLOCK_VARS then the risk 
> >> is that you generate an infinite chain in some BLOCK_VARS list and 
> >> thus get infinite loops somewhere in the compiler.
> >
> > That's what I said for using SET_PREVAIL.  But his hack would 
> > specifically not replace anything in TREE_CHAIN (and hence 
> > BLOCK_VARS), and indeed not check anything either.
> 
> Hm, but we shouldn't end up streaming any BLOCKs at this point (nor 
> local TYPE_DECLs).  Those are supposed to be in the local function 
> sections only where no fixup for prevailing decls happens.

That's true, something is fishy with the patch, will try to investigate.


Ciao,
Michael.


Remove obsolete compatibility notes in vec.h

2012-09-12 Thread Diego Novillo
Committed to trunk.


* vec.h: Remove compatibility notes for previous distinction
between vectors of objects and vectors of pointers.

diff --git a/gcc/vec.h b/gcc/vec.h
index 88891d7..8858f6a 100644
--- a/gcc/vec.h
+++ b/gcc/vec.h
@@ -31,23 +31,6 @@ along with GCC; see the file COPYING3.  If not see
sometimes backed by out-of-line generic functions.  The vectors are
designed to interoperate with the GTY machinery.
 
-   FIXME - Remove the following compatibility notes after a handler
-   class for vec_t is implemented.
-
-   To preserve compatibility with the existing API, some functions
-   that manipulate vector elements implement two overloads: one taking
-   a pointer to the element and others that take a pointer to a
-   pointer to the element.
-
-   This used to be implemented with three different families of macros
-   and structures: structure objects, scalar objects and of pointers.
-   Both the structure object and pointer variants passed pointers to
-   objects around -- in the former case the pointers were stored into
-   the vector and in the latter case the pointers were dereferenced and
-   the objects copied into the vector.  The scalar object variant was
-   suitable for int-like objects, and the vector elements were returned
-   by value.
-
There are both 'index' and 'iterate' accessors.  The index accessor
is implemented by operator[].  The iterator returns a boolean
iteration condition and updates the iteration variable passed by
@@ -124,7 +107,6 @@ along with GCC; see the file COPYING3.  If not see
VEC_safe_push(tree,gc,s->v,decl); // append some decl onto the end
for (ix = 0; VEC_iterate(tree,s->v,ix,elt); ix++)
  { do something with elt }
-
 */
 
 #if ENABLE_CHECKING


[PATCH] Fix PR54489 - FRE needing AVAIL_OUT

2012-09-12 Thread Richard Guenther

This removes the need for FRE to compute AVAIL_OUT which can
consume an unreasonable amount of memory for testcases like

int foo (int a)
{
  int b = 0;
#define X if (a) b = b + 1;
#define XX X X X X X X X X X X
#define XXX XX XX XX XX XX XX XX XX XX XX
#define  XXX XXX XXX XXX XXX XXX XXX XXX XXX XXX
#define X          
#define XX X X X X X X X X X X
#define XXX XX XX XX XX XX XX XX XX 
XX
XX
  XX
  return b;
}

instead of computing AVAIL_OUT up-front for all basic-block just
to find the SSA name whose definition dominates the current
elimination point this re-organizes elimination to perform a domwalk,
updating a SCCVN value-id (thus, SSA name) -> leader (dominating SSA
name) mapping.

It also simplifies PRE by removing that in_fre global flag and
not sharing a common execute function.  FRE and PRE now only share
elimination (and value-numbering).

It comes to my mind that if we manage to get rid of the AVAIL_OUT
use from clean () and dependent_clean () we can delay PRE insertion
(well, producing and inserting actual GIMPLE stmts) to elimination
time as well, removing its need for AVAIL_OUT.  But that's surely
for a followup (and I bet sth else than PRE blows up at -O2 as well).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2012-09-12  Richard Guenther  

PR tree-optimization/54489
* tree-ssa-pre.c: Include domwalk.h.
(in_fre): Remove.
(sccvn_valnum_from_value_id): New function.
(debug_bitmap_sets_for): Simplify.
(get_representative_for): Properly initialize the SCCVN valnum.
(create_expression_by_pieces): Likewise.
(insert_into_preds_of_block): Likewise.
(can_PRE_operation): Remove.
(make_values_for_phi): Simplify.
(compute_avail): Likewise.
(do_SCCVN_insertion): Remove.
(eliminate_avail, eliminate_push_avail, eliminate_insert):
New functions.
(eliminate): Split and perform a domwalk.
(eliminate_bb): Former eliminate part that is now dom-enter.
(eliminate_leave_block): New function.
(fini_eliminate): Likewise.
(init_pre): Simplify.
(fini_pre): Likewise.
(execute_pre): Fold into do_pre and do_fre.
(do_pre): Consume execute_pre.
(do_fre): Likewise.
* Makefile.in (tree-ssa-pre.o): Add domwalk.h dependency.

Index: gcc/tree-ssa-pre.c
===
--- gcc/tree-ssa-pre.c  (revision 191215)
+++ gcc/tree-ssa-pre.c  (working copy)
@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3.
 #include "tree-scalar-evolution.h"
 #include "params.h"
 #include "dbgcnt.h"
+#include "domwalk.h"
 
 /* TODO:
 
@@ -351,8 +352,6 @@ get_or_alloc_expr_for_name (tree name)
   return result;
 }
 
-static bool in_fre = false;
-
 /* An unordered bitmap set.  One bitmap tracks values, the other,
expressions.  */
 typedef struct bitmap_set
@@ -637,6 +636,25 @@ get_expr_value_id (pre_expr expr)
 }
 }
 
+/* Return a SCCVN valnum (SSA name or constant) for the PRE value-id VAL.  */
+
+static tree
+sccvn_valnum_from_value_id (unsigned int val)
+{
+  bitmap_iterator bi;
+  unsigned int i;
+  bitmap exprset = VEC_index (bitmap, value_expressions, val);
+  EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
+{
+  pre_expr vexpr = expression_for_id (i);
+  if (vexpr->kind == NAME)
+   return VN_INFO (PRE_EXPR_NAME (vexpr))->valnum;
+  else if (vexpr->kind == CONSTANT)
+   return PRE_EXPR_CONSTANT (vexpr);
+}
+  return NULL_TREE;
+}
+
 /* Remove an expression EXPR from a bitmapped set.  */
 
 static void
@@ -1022,16 +1040,13 @@ DEBUG_FUNCTION void
 debug_bitmap_sets_for (basic_block bb)
 {
   print_bitmap_set (stderr, AVAIL_OUT (bb), "avail_out", bb->index);
-  if (!in_fre)
-{
-  print_bitmap_set (stderr, EXP_GEN (bb), "exp_gen", bb->index);
-  print_bitmap_set (stderr, PHI_GEN (bb), "phi_gen", bb->index);
-  print_bitmap_set (stderr, TMP_GEN (bb), "tmp_gen", bb->index);
-  print_bitmap_set (stderr, ANTIC_IN (bb), "antic_in", bb->index);
-  if (do_partial_partial)
-   print_bitmap_set (stderr, PA_IN (bb), "pa_in", bb->index);
-  print_bitmap_set (stderr, NEW_SETS (bb), "new_sets", bb->index);
-}
+  print_bitmap_set (stderr, EXP_GEN (bb), "exp_gen", bb->index);
+  print_bitmap_set (stderr, PHI_GEN (bb), "phi_gen", bb->index);
+  print_bitmap_set (stderr, TMP_GEN (bb), "tmp_gen", bb->index);
+  print_bitmap_set (stderr, ANTIC_IN (bb), "antic_in", bb->index);
+  if (do_partial_partial)
+print_bitmap_set (stderr, PA_IN (bb), "pa_in", bb->index);
+  print_bitmap_set (stderr, NEW_SETS (bb), "new_sets", bb->index);
 }
 
 /* Print out the expressions that have VAL to OUTFILE.  */
@@ -1402,11 +1417,9 @@ get_representative_for (const pre_expr e
  that we will return.  */

Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Michael Matz
Hi,

On Wed, 12 Sep 2012, Michael Matz wrote:

> > Hm, but we shouldn't end up streaming any BLOCKs at this point (nor 
> > local TYPE_DECLs).  Those are supposed to be in the local function 
> > sections only where no fixup for prevailing decls happens.
> 
> That's true, something is fishy with the patch, will try to investigate.

ipa-prop creates the problem.  Its tree mapping can contain expressions, 
expressions can have locations, locations now have blocks.  The tree maps 
are stored as part of jump functions, and hence as part of node summaries.  
Node summaries are global, hence blocks, and therefore block vars can be 
placed in the global blob.

That's not supposed to happen.  The patch below fixes this instance of the 
problem and makes the testcase work with Dehaos patch with the 
LTO_NO_PREVAIL call added back in.


Ciao,
Michael.

Index: lto-cgraph.c
===
--- lto-cgraph.c(revision 190803)
+++ lto-cgraph.c(working copy)
@@ -1373,6 +1373,7 @@ output_node_opt_summary (struct output_b
  mechanism to store function local declarations into summaries.  */
   gcc_assert (parm);
   streamer_write_uhwi (ob, parm_num);
+  gcc_assert (IS_UNKNOWN_LOCATION (EXPR_LOCATION (map->new_tree)));
   stream_write_tree (ob, map->new_tree, true);
   bp = bitpack_create (ob->main_stream);
   bp_pack_value (&bp, map->replace_p, 1);
Index: ipa-prop.c
===
--- ipa-prop.c  (revision 190803)
+++ ipa-prop.c  (working copy)
@@ -1378,7 +1378,11 @@ ipa_compute_jump_functions_for_edge (str
   tree arg = gimple_call_arg (call, n);
 
   if (is_gimple_ip_invariant (arg))
-   ipa_set_jf_constant (jfunc, arg);
+   {
+ arg = unshare_expr (arg);
+ SET_EXPR_LOCATION (arg, UNKNOWN_LOCATION);
+ ipa_set_jf_constant (jfunc, arg);
+   }
   else if (!is_gimple_reg_type (TREE_TYPE (arg))
   && TREE_CODE (arg) == PARM_DECL)
{
@@ -3154,6 +3158,7 @@ ipa_write_jump_function (struct output_b
   stream_write_tree (ob, jump_func->value.known_type.component_type, true);
   break;
 case IPA_JF_CONST:
+  gcc_assert (IS_UNKNOWN_LOCATION (EXPR_LOCATION 
(jump_func->value.constant)));
   stream_write_tree (ob, jump_func->value.constant, true);
   break;
 case IPA_JF_PASS_THROUGH:


Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Martin Jambor
Hi,

On Wed, Sep 12, 2012 at 04:17:45PM +0200, Michael Matz wrote:
> Hi,
> 
> On Wed, 12 Sep 2012, Michael Matz wrote:
> 
> > > Hm, but we shouldn't end up streaming any BLOCKs at this point (nor 
> > > local TYPE_DECLs).  Those are supposed to be in the local function 
> > > sections only where no fixup for prevailing decls happens.
> > 
> > That's true, something is fishy with the patch, will try to investigate.
> 
> ipa-prop creates the problem.  Its tree mapping can contain expressions, 
> expressions can have locations, locations now have blocks.  The tree maps 
> are stored as part of jump functions, and hence as part of node summaries.  
> Node summaries are global, hence blocks, and therefore block vars can be 
> placed in the global blob.
> 
> That's not supposed to happen.  The patch below fixes this instance of the 
> problem and makes the testcase work with Dehaos patch with the 
> LTO_NO_PREVAIL call added back in.
> 
> 
> Ciao,
> Michael.
> 
> Index: lto-cgraph.c
> ===
> --- lto-cgraph.c  (revision 190803)
> +++ lto-cgraph.c  (working copy)
> @@ -1373,6 +1373,7 @@ output_node_opt_summary (struct output_b
>   mechanism to store function local declarations into summaries.  */
>gcc_assert (parm);
>streamer_write_uhwi (ob, parm_num);
> +  gcc_assert (IS_UNKNOWN_LOCATION (EXPR_LOCATION (map->new_tree)));
>stream_write_tree (ob, map->new_tree, true);
>bp = bitpack_create (ob->main_stream);
>bp_pack_value (&bp, map->replace_p, 1);
> Index: ipa-prop.c
> ===
> --- ipa-prop.c(revision 190803)
> +++ ipa-prop.c(working copy)
> @@ -1378,7 +1378,11 @@ ipa_compute_jump_functions_for_edge (str
>tree arg = gimple_call_arg (call, n);
>  
>if (is_gimple_ip_invariant (arg))
> - ipa_set_jf_constant (jfunc, arg);
> + {
> +   arg = unshare_expr (arg);
> +   SET_EXPR_LOCATION (arg, UNKNOWN_LOCATION);
> +   ipa_set_jf_constant (jfunc, arg);
> + }
>else if (!is_gimple_reg_type (TREE_TYPE (arg))
>  && TREE_CODE (arg) == PARM_DECL)

Perhaps it would be better if ipa_set_jf_constant did that, just in
case we ever add another caller?  Note that arithmetic functions also
have their second operand tree stored in them and so perhaps
ipa_set_jf_arith_pass_through should do the same.

And I it is also necessary to do the same thing at the end of
determine_known_aggregate_parts, i.e. before assignment to
item->value.  I can post a separate patch if necessary.

I wasn't following this thread but I hope that streaming types does
not cause this problem.  If they do, there are quite a few in various
jump functions and indirect call graph edges.

Thanks,

Martin


>   {
> @@ -3154,6 +3158,7 @@ ipa_write_jump_function (struct output_b
>stream_write_tree (ob, jump_func->value.known_type.component_type, 
> true);
>break;
>  case IPA_JF_CONST:
> +  gcc_assert (IS_UNKNOWN_LOCATION (EXPR_LOCATION 
> (jump_func->value.constant)));
>stream_write_tree (ob, jump_func->value.constant, true);
>break;
>  case IPA_JF_PASS_THROUGH:


Re: Backtrace library [1/3]

2012-09-12 Thread Ian Lance Taylor
On Tue, Sep 11, 2012 at 4:36 PM, Lawrence Crowl  wrote:
> On 9/11/12, Ian Lance Taylor  wrote:
>> This patch is the interface to and configury of libbacktrace.
>> I've separated these out as the parts of libbacktrace that
>> require the most review.  The interface to libbacktrace is in
>> the file backtrace.h.  This is what callers will use.  The file
>> backtrace-supported.h is also available so that programs can see
>> whether calling the backtrace library will work at all.
>
> The interface relies on global data in the library.  Wouldn't it
> be better to expose the state as an additional parameter to enable
> concurrent access by different threads?  That parameter could then
> be modeled as 'this' parameter, addressing Gaby's suggesting.

I went ahead and added a state parameter to the interface.  I've
attached the updated patch.

Ian


foo1.patch
Description: Binary data


Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Richard Guenther
On Wed, Sep 12, 2012 at 4:37 PM, Martin Jambor  wrote:
> Hi,
>
> On Wed, Sep 12, 2012 at 04:17:45PM +0200, Michael Matz wrote:
>> Hi,
>>
>> On Wed, 12 Sep 2012, Michael Matz wrote:
>>
>> > > Hm, but we shouldn't end up streaming any BLOCKs at this point (nor
>> > > local TYPE_DECLs).  Those are supposed to be in the local function
>> > > sections only where no fixup for prevailing decls happens.
>> >
>> > That's true, something is fishy with the patch, will try to investigate.
>>
>> ipa-prop creates the problem.  Its tree mapping can contain expressions,
>> expressions can have locations, locations now have blocks.  The tree maps
>> are stored as part of jump functions, and hence as part of node summaries.
>> Node summaries are global, hence blocks, and therefore block vars can be
>> placed in the global blob.
>>
>> That's not supposed to happen.  The patch below fixes this instance of the
>> problem and makes the testcase work with Dehaos patch with the
>> LTO_NO_PREVAIL call added back in.
>>
>>
>> Ciao,
>> Michael.
>> 
>> Index: lto-cgraph.c
>> ===
>> --- lto-cgraph.c  (revision 190803)
>> +++ lto-cgraph.c  (working copy)
>> @@ -1373,6 +1373,7 @@ output_node_opt_summary (struct output_b
>>   mechanism to store function local declarations into summaries.  */
>>gcc_assert (parm);
>>streamer_write_uhwi (ob, parm_num);
>> +  gcc_assert (IS_UNKNOWN_LOCATION (EXPR_LOCATION (map->new_tree)));
>>stream_write_tree (ob, map->new_tree, true);
>>bp = bitpack_create (ob->main_stream);
>>bp_pack_value (&bp, map->replace_p, 1);
>> Index: ipa-prop.c
>> ===
>> --- ipa-prop.c(revision 190803)
>> +++ ipa-prop.c(working copy)
>> @@ -1378,7 +1378,11 @@ ipa_compute_jump_functions_for_edge (str
>>tree arg = gimple_call_arg (call, n);
>>
>>if (is_gimple_ip_invariant (arg))
>> - ipa_set_jf_constant (jfunc, arg);
>> + {
>> +   arg = unshare_expr (arg);
>> +   SET_EXPR_LOCATION (arg, UNKNOWN_LOCATION);
>> +   ipa_set_jf_constant (jfunc, arg);
>> + }
>>else if (!is_gimple_reg_type (TREE_TYPE (arg))
>>  && TREE_CODE (arg) == PARM_DECL)
>
> Perhaps it would be better if ipa_set_jf_constant did that, just in
> case we ever add another caller?  Note that arithmetic functions also
> have their second operand tree stored in them and so perhaps
> ipa_set_jf_arith_pass_through should do the same.
>
> And I it is also necessary to do the same thing at the end of
> determine_known_aggregate_parts, i.e. before assignment to
> item->value.  I can post a separate patch if necessary.
>
> I wasn't following this thread but I hope that streaming types does
> not cause this problem.  If they do, there are quite a few in various
> jump functions and indirect call graph edges.

Well, or if jump functions would not use trees ...

Richard.

> Thanks,
>
> Martin
>
>
>>   {
>> @@ -3154,6 +3158,7 @@ ipa_write_jump_function (struct output_b
>>stream_write_tree (ob, jump_func->value.known_type.component_type, 
>> true);
>>break;
>>  case IPA_JF_CONST:
>> +  gcc_assert (IS_UNKNOWN_LOCATION (EXPR_LOCATION 
>> (jump_func->value.constant)));
>>stream_write_tree (ob, jump_func->value.constant, true);
>>break;
>>  case IPA_JF_PASS_THROUGH:


Re: Backtrace library [3/3]

2012-09-12 Thread Ian Lance Taylor
On Tue, Sep 11, 2012 at 10:16 PM, Jakub Jelinek  wrote:
> On Tue, Sep 11, 2012 at 03:55:04PM -0700, Ian Lance Taylor wrote:
>> +/* The DWARF abbreviations for a compilation unit.  This structure
>> +   only exists while reading the compilation unit.  Most DWARF readers
>> +   seem to a hash table to map abbrev ID's to abbrev entries.
>> +   However, we primarily care about GCC, and GCC simply issues ID's in
>> +   numerical order starting at 1.  So we simply keep a sorted vector,
>> +   and try to just look up the code.  */
>
> As soon as you run dwz on it, the abbrevs won't be in numerical order,
> and even GCC really should be changed to sort the abbrevs by decreasing
> use count if there are more than 127 abbrevs (special casing the base types
> that are used for typed DWARF stack).

OK.  Still, the current code should work correctly.

> The patch also doesn't handle DW_TAG_imported_unit/DW_TAG_partial_unit,
> which is a show stopper if dwz is used (and
> DW_FORM_GNU_strp_alt/DW_FORM_GNU_ref_alt isn't handled properly).
>
> BTW, the library doesn't seem to implement what gdb does, in particular
> improved support for tail calls.  In gdb that was (at least);
> 2011-10-09  Jan Kratochvil  
>
> Protect entry values against self tail calls.
> ...
>
> 2011-10-09  Jan Kratochvil  
>
> Recognize virtual tail call frames.


Yes.  I think these are issues that can be addressed after the initial
version of the library is approved and committed.

Ian


Re: [PATCH] Fix PR54489 - FRE needing AVAIL_OUT

2012-09-12 Thread Steven Bosscher
On Wed, Sep 12, 2012 at 4:02 PM, Richard Guenther wrote:
> for a followup (and I bet sth else than PRE blows up at -O2 as well).

Actually, the only thing that really blows up is that enemy of scalability, VRP.

With -O2 -fno-tree-{pre,fre,vrp}, the slowest part of the compiler on
this test case are dominance computation, dominator-based
optimization, and tree CFG cleanup, but they all scale linearly with
the number of X'es in the test case :-)

Ciao!
Steven


Re: Backtrace library [3/3]

2012-09-12 Thread Ian Lance Taylor
On Wed, Sep 12, 2012 at 2:01 AM, Florian Weimer  wrote:
>
> +backtrace_open (const char *filename, backtrace_error_callback
> error_callback,
> +   void *data)
> +{
> +  int descriptor;
> +
> +  descriptor = open (filename, O_RDONLY | O_CLOEXEC);
> +  if (descriptor < 0)
> +{
> +  error_callback (data, filename, errno);
> +  return -1;
> +}
> +  if (O_CLOEXEC == 0)
> +{
> +  /* It doesn't matter if this fails for some reason.  */
> +  fcntl (descriptor, F_SETFD, FD_CLOEXEC);
> +}
>
> You should call fcntl unconditionally.  O_CLOEXEC might be non-zero during
> build, but could still be ignored by the kernel.

OK, done.

> +static void
> +fileline_initialize (backtrace_error_callback error_callback, void *data)
> +{
> ...
> +  if (executable_name != NULL)
> +descriptor = backtrace_open (executable_name, error_callback, data);
> +  else
> +descriptor = backtrace_open ("/proc/self/exe", error_callback, data);
>
> You should try getauxval(AT_EXECFN) as well (needs recent glibc), so that
> this works with a mounted /proc.

I'm going to postpone this--my glibc doesn't have this function.

> This library should only be used when getauxval(AT_SECURE) is zero, so that
> the program doesn't try to read files with elevated privileges to which the
> original user wouldn't have access.  I don't think this has to be addressed
> within the library itself.

The library doesn't try to elevate privileges, so, yes, I think this
is entirely the responsibility of the program calling the library.

> Adding /usr/lib/debug support shouldn't be too hard, I will try to figure
> out the required path transformations (which are somewhat system-specific).

Thanks!

Ian


Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Martin Jambor
Hi,

On Wed, Sep 12, 2012 at 04:47:11PM +0200, Richard Guenther wrote:
> On Wed, Sep 12, 2012 at 4:37 PM, Martin Jambor  wrote:
> > On Wed, Sep 12, 2012 at 04:17:45PM +0200, Michael Matz wrote:
> >> On Wed, 12 Sep 2012, Michael Matz wrote:
> >>
> >> > > Hm, but we shouldn't end up streaming any BLOCKs at this point (nor
> >> > > local TYPE_DECLs).  Those are supposed to be in the local function
> >> > > sections only where no fixup for prevailing decls happens.
> >> >
> >> > That's true, something is fishy with the patch, will try to investigate.
> >>
> >> ipa-prop creates the problem.  Its tree mapping can contain expressions,
> >> expressions can have locations, locations now have blocks.  The tree maps
> >> are stored as part of jump functions, and hence as part of node summaries.
> >> Node summaries are global, hence blocks, and therefore block vars can be
> >> placed in the global blob.
> >>
> >> That's not supposed to happen.  The patch below fixes this instance of the
> >> problem and makes the testcase work with Dehaos patch with the
> >> LTO_NO_PREVAIL call added back in.
> >>
> >>
> >> Ciao,
> >> Michael.
> >> 
> >> Index: lto-cgraph.c
> >> ===
> >> --- lto-cgraph.c  (revision 190803)
> >> +++ lto-cgraph.c  (working copy)
> >> @@ -1373,6 +1373,7 @@ output_node_opt_summary (struct output_b
> >>   mechanism to store function local declarations into summaries.  
> >> */
> >>gcc_assert (parm);
> >>streamer_write_uhwi (ob, parm_num);
> >> +  gcc_assert (IS_UNKNOWN_LOCATION (EXPR_LOCATION (map->new_tree)));
> >>stream_write_tree (ob, map->new_tree, true);
> >>bp = bitpack_create (ob->main_stream);
> >>bp_pack_value (&bp, map->replace_p, 1);
> >> Index: ipa-prop.c
> >> ===
> >> --- ipa-prop.c(revision 190803)
> >> +++ ipa-prop.c(working copy)
> >> @@ -1378,7 +1378,11 @@ ipa_compute_jump_functions_for_edge (str
> >>tree arg = gimple_call_arg (call, n);
> >>
> >>if (is_gimple_ip_invariant (arg))
> >> - ipa_set_jf_constant (jfunc, arg);
> >> + {
> >> +   arg = unshare_expr (arg);
> >> +   SET_EXPR_LOCATION (arg, UNKNOWN_LOCATION);
> >> +   ipa_set_jf_constant (jfunc, arg);
> >> + }
> >>else if (!is_gimple_reg_type (TREE_TYPE (arg))
> >>  && TREE_CODE (arg) == PARM_DECL)
> >
> > Perhaps it would be better if ipa_set_jf_constant did that, just in
> > case we ever add another caller?  Note that arithmetic functions also
> > have their second operand tree stored in them and so perhaps
> > ipa_set_jf_arith_pass_through should do the same.
> >
> > And I it is also necessary to do the same thing at the end of
> > determine_known_aggregate_parts, i.e. before assignment to
> > item->value.  I can post a separate patch if necessary.
> >
> > I wasn't following this thread but I hope that streaming types does
> > not cause this problem.  If they do, there are quite a few in various
> > jump functions and indirect call graph edges.
> 
> Well, or if jump functions would not use trees ...
> 

Eventually, yes, but it is not a short-term goal.  At least I'd like
to replace the type-based devirtualization with pointer tracking
first, which will avoid the need to store (hopefully) all types and I
will have a better idea how to represent the jump functions in a
different way.  I will still need access to VMT initializers to keep
indirect inlining, indirect call inlining hints and IPA-CP
devirtualization working, though.

But as I said, this is all a long-term plan.

Martin



Finish up PR rtl-optimization/44194

2012-09-12 Thread Eric Botcazou
This is the PR about the useless spilling to memory of structures that are 
returned in registers.  It was essentially addressed last year by Easwaran with 
an enhancement of the RTL DSE pass, but Easwaran also noted that we still spill 
to memory in the simplest cases, e.g. gcc.dg/pr44194-1.c, because expand_call
creates a temporary on the stack to store the value returned in registers...

The attached patch solves this problem by copying the value into pseudos 
instead by means of emit_group_move_into_temps.  This is sufficient to get rid 
of the remaining memory accesses for gcc.dg/pr44194-1.c on x86-64 for example,
but not on strict-alignment platforms like SPARC64.

The problem is that, on strict-alignment platforms, emit_group_store will use 
bitfield techniques (store_bit_field) to store the returned value, and the 
bitfield routines (store_bit_field and extract_bit_field) have these lines:

  /* We may be accessing data outside the field, which means
 we can alias adjacent data.  */
  if (MEM_P (op0))
{
  op0 = shallow_copy_rtx (op0);
  set_mem_alias_set (op0, 0);
  set_mem_expr (op0, 0);
}

Now the enhancement implemented in the RTL DSE pass by Easwaran is precisely 
based on the MEM_EXPR of MEM objects.

The patch solves this problem by implementing a variant of adjust_address along 
the lines of the comment at the end of adjust_address_1:

  /* At some point, we should validate that this offset is within the object,
 if all the appropriate values are known.  */
  return new_rtx;

i.e. adjust_bitfield_address will drop the underlying object of the MEM if it 
cannot prove that the adjusted memory access is still within its bounds.
The bitfield manipulation routines in expmed.c are then changed to invoke 
adjust_bitfield_address instead of adjust_address and the above special lines 
in store_bit_field and extract_bit_field are eliminated.

While I was at it, I also fixed a probable oversight in extract_bit_field_1 
that has bothered me for a while: in the multi-word case, extract_bit_field_1 
recurses on extract_bit_field instead of itself (unlike store_bit_field_1), 
which short-circuits the FALLBACK_P parameter.

Tested on x86-64/Linux and SPARC64/Solaris.  Comments?


2012-09-12  Eric Botcazou  

PR rtl-optimization/44194
* calls.c (expand_call): In the PARALLEL case, copy the return value
into pseudos instead of spilling it onto the stack.
* emit-rtl.c (adjust_address_1): Rename ADJUST into ADJUST_ADDRESS and
add new ADJUST_OBJECT parameter.
If ADJUST_OBJECT is set, drop the underlying object if it cannot be
proved that the adjusted memory access is still within its bounds.
(adjust_automodify_address_1): Adjust call to adjust_address_1.
(widen_memory_access): Likewise.
* expmed.c (store_bit_field_1): Call adjust_bitfield_address instead
of adjust_address.  Do not drop the underlying object of a MEM.
(store_fixed_bit_field): Likewise.
(extract_bit_field_1): Likewise.  Fix oversight in recursion.
(extract_fixed_bit_field): Likewise.
* expr.h (adjust_address_1): Adjust prototype.
(adjust_address): Adjust call to adjust_address_1.
(adjust_address_nv): Likewise.
(adjust_bitfield_address): New macro.
(adjust_bitfield_address_nv): Likewise.
* expr.c (expand_assignment): Handle a PARALLEL in more cases.
(store_expr): Likewise.
(store_field): Likewise.

* dse.c: Fix typos in the head comment.


-- 
Eric Botcazou
Index: expr.c
===
--- expr.c	(revision 191198)
+++ expr.c	(working copy)
@@ -4870,8 +4870,16 @@ expand_assignment (tree to, tree from, b
   /* Handle calls that return values in multiple non-contiguous locations.
 	 The Irix 6 ABI has examples of this.  */
   if (GET_CODE (to_rtx) == PARALLEL)
-	emit_group_load (to_rtx, value, TREE_TYPE (from),
-			 int_size_in_bytes (TREE_TYPE (from)));
+	{
+	  if (GET_CODE (value) == PARALLEL)
+	emit_group_move (to_rtx, value);
+	  else
+	emit_group_load (to_rtx, value, TREE_TYPE (from),
+			 int_size_in_bytes (TREE_TYPE (from)));
+	}
+  else if (GET_CODE (value) == PARALLEL)
+	emit_group_store (to_rtx, value, TREE_TYPE (from),
+			  int_size_in_bytes (TREE_TYPE (from)));
   else if (GET_MODE (to_rtx) == BLKmode)
 	emit_block_move (to_rtx, value, expr_size (from), BLOCK_OP_NORMAL);
   else
@@ -4903,9 +4911,16 @@ expand_assignment (tree to, tree from, b
   else
 	temp = expand_expr (from, NULL_RTX, GET_MODE (to_rtx), EXPAND_NORMAL);
 
+  /* Handle calls that return values in multiple non-contiguous locations.
+	 The Irix 6 ABI has examples of this.  */
   if (GET_CODE (to_rtx) == PARALLEL)
-	emit_group_load (to_rtx, temp, TREE_TYPE (from),
-			 int_size_in_bytes (TREE_TYPE (from)));
+	{
+	  if (GET_CODE (temp) == PARALLEL)
+	emit_group_move (t

Re: [PATCH,i386] Enable prefetchw in processor alias table for AMD targets

2012-09-12 Thread Uros Bizjak
On Tue, Sep 11, 2012 at 11:03 AM,   wrote:
> Hi Maintainers,
>
> This patch enables "prefetchw" ISA in the processor alias table for targets 
> amdfam10,barcelona and bdver1,2 and btver1,2.
>
> GCC regression test passes with the patch.
>
> Ok for trunk?
>
> Change log:
>
> 2012-09-11  Venkataramanan Kumar  
>
> * config/i386/i386.c (processor_alias_table): Enable PTA_PRFCHW
> for targets amdfam10, barcelona, bdver1, bdver2, btver1 and btver2.

Please note that amdfam10 and barcelona are already generating
prefetchw due to PTA_3DNOW flag, so these targets can be removed from
the patch.

The patch is OK for mainline with that change. Please commit the patch.

Thanks,
Uros.


[AArch64] fix missing Dwarf call frame information in the epilogue

2012-09-12 Thread Yufeng Zhang
This patch fixes a bug in the AArch64 epilogue expander which failed to 
generate consistent information of REG_FRAME_RELATED_EXPR reg notes in 
some cases; for instance, given the following C source code:


int bar (unsigned int);

int foo (void)
{
   return bar (0xcafe);
}
--- CUT ---

The -O0 -g code gen for the function 'foo' is:

 .cfi_startproc
 stp x29, x30, [sp, -16]!
.LCFI0:
 .cfi_def_cfa_offset 16
 .cfi_offset 29, -16
 .cfi_offset 30, -8
 add x29, sp, 0
.LCFI1:
 .cfi_def_cfa_register 29
.LM2:
 mov w0, 57005
 bl  bar
.LM3:
 ldp x29, x30, [sp], 16
 .cfi_restore 30
 .cfi_restore 29
 ret
 .cfi_endproc
--- CUT ---

As can be seen, the CFA reg is X29 from the ADD instruction until the
end of the func; this is wrong as the LDP instruction restores X29;
the CFA reg should be set to SP after LDP.

The patch passes the gcc and g++ testsuites.  Is it OK for the aarch64 
branch?



Thanks,
Yufeng


gcc/ChangeLog

2012-09-12  Yufeng Zhang  

* config/aarch64/aarch64.c (aarch64_expand_prologue): Add new local
variable 'cfa_reg' and replace stack_pointer_rtx with it in the
REG_FRAME_RELATED_EXPR reg note for the load-pair with writeback
instruction.

gcc/testsuite/ChangeLog

2012-09-12  Yufeng Zhang  

* gcc.target/aarch64/dwarf-cfa-reg.c: New file.diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 2d7eba7..f452d3d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1786,7 +1786,7 @@ aarch64_expand_prologue (void)
 	   - original_frame_size
 	   - cfun->machine->frame.saved_regs_size);
 
-  /* Store pairs and load pairs have a range only of +/- 512.  */
+  /* Store pairs and load pairs have a range only -512 to 504.  */
   if (offset >= 512)
 {
   /* When the frame has a large size, an initial decrease is done on
@@ -1934,6 +1934,7 @@ aarch64_expand_epilogue (bool for_sibcall)
   HOST_WIDE_INT original_frame_size, frame_size, offset;
   HOST_WIDE_INT fp_offset;
   rtx insn;
+  rtx cfa_reg;
 
   aarch64_layout_frame ();
   original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
@@ -1946,7 +1947,9 @@ aarch64_expand_epilogue (bool for_sibcall)
 	   - original_frame_size
 	   - cfun->machine->frame.saved_regs_size);
 
-  /* Store pairs and load pairs have a range only of +/- 512.  */
+  cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
+
+  /* Store pairs and load pairs have a range only -512 to 504.  */
   if (offset >= 512)
 {
   offset = original_frame_size + cfun->machine->frame.saved_regs_size;
@@ -1977,6 +1980,10 @@ aarch64_expand_epilogue (bool for_sibcall)
    hard_frame_pointer_rtx,
    GEN_INT (- fp_offset)));
   RTX_FRAME_RELATED_P (insn) = 1;
+  /* As SP is set to (FP - fp_offset), according to the rules in
+	 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
+	 from the value of SP from now on.  */
+  cfa_reg = stack_pointer_rtx;
 }
 
   aarch64_save_or_restore_callee_save_registers
@@ -2016,10 +2023,15 @@ aarch64_expand_epilogue (bool for_sibcall)
  GEN_INT (offset),
  GEN_INT (GET_MODE_SIZE (DImode) + offset)));
 	  RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
+	  /* The REG_FRAME_RELATED_EXPR to be attached to the above insn
+		 is either SP = SP + offset or SP = FP + offset, depending on
+		 which register had been used to calculate the CFA before
+		 the insn; note that FP had the same value as SP before the
+		 above insn.  */
 	  aarch64_set_frame_expr (gen_rtx_SET
   (Pmode,
    stack_pointer_rtx,
-   gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+   gen_rtx_PLUS (Pmode, cfa_reg,
 		 GEN_INT (offset;
 	}
 
@@ -2040,7 +2052,6 @@ aarch64_expand_epilogue (bool for_sibcall)
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 	}
-
   else
 	{
 	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
diff --git a/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c b/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c
new file mode 100644
index 000..cce8815
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/dwarf-cfa-reg.c
@@ -0,0 +1,14 @@
+/* Verify that CFA register is restored to SP after FP is restored.  */
+/* { dg-do compile } */
+/* { dg-options "-O0 -gdwarf-2" } */
+/* { dg-final { scan-assembler ".cfi_restore 30" } } */
+/* { dg-final { scan-assembler ".cfi_restore 29" } } */
+/* { dg-final { scan-assembler ".cfi_def_cfa 31, 0" } } */
+/* { dg-final { scan-assembler "ret" } } */
+
+int bar (unsigned int);
+
+int foo (void)
+{
+  return bar (0xcafe);
+}

Re: Backtrace library [1/3]

2012-09-12 Thread Joseph S. Myers
On Tue, 11 Sep 2012, Ian Lance Taylor wrote:

> The configury is fairly standard.  Note that libbacktrace is built as
> both a host library (to link into the compilers) and as a target library
> (to link into libgo and possibly other libraries).

Under what circumstances will the library be built for the target - only 
if a relevant language such as Go is being built, or unconditionally?

If built unconditionally, will the library build OK for the target in 
situations where no target headers are yet available?  (For such builds of 
compilers used to bootstrap libc it would be usual to use various 
--disable- options to disable libraries not needed to build libc, and to 
use --enable-languages=c, but if this library is used on the host side by 
the compiler then the generic --disable-libbacktrace might not suffice 
since that would disable the host copy, required by the compiler itself, 
as well as the target copy.)

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: Backtrace library [1/3]

2012-09-12 Thread Toon Moene

On 09/12/2012 01:08 AM, Ian Lance Taylor wrote:


The interface is somewhat constrained in that, on systems that support
anonymous mmap, it does not call malloc.  That makes it possible to do
a symbolic backtrace from a signal handler.


It would also make it possible to have a traceback of a segmentation 
fault caused by corruption of the malloc arena ...


--
Toon Moene - e-mail: t...@moene.org - phone: +31 346 214290
Saturnushof 14, 3738 XG  Maartensdijk, The Netherlands
At home: http://moene.org/~toon/; weather: http://moene.org/~hirlam/
Progress of GNU Fortran: http://gcc.gnu.org/wiki/GFortran#news


LTO partitioning reorg 4/n

2012-09-12 Thread Jan Hubicka
Hi,
this patch unifies the code as promised in the previous patch. It also cleans
up the APIs.  We now have enum symbol_class specifying how the symbol should be
handled and add_symbol_to_partition handling all the magic of what needs to be
dragged into partition and what not.

I also commonized some code that was duplicated across different partitioners
and added "max" partitioner that puts every symbol into new partition if
possible.  

The patch also fixed undefined reference symbol seen with max partitioning on a 
testcase.
Here we fold through a reference to constant variable into a reference to 
constant pool.
Since initializer of the constant variable is not seen by the partitioner, we 
do not
duplicate the constant pool entry into the unit and we end up with undefined 
reference.
Fixed thus.  I will try to prepare fix for 4.7, too.  I think it is the problem
Andi is seeing with kernel build.

Bootstrapped/regtested x86_64-linux. Will commit it after bit of further 
testing.

* common.opt (flto-partition): Add "max".
* invoke.texi (flto-partition): Document "max"

* lto.c (do_whole_program_analysis): Care timevars, statistics and
AUX pointer cleaning. Add max partitioning.
* lto-partition.c (enum symbol_class): New.
(get_symbol_class): New function.
(symbol_partitioned_p): New function.
(add_references_to_partition): Remove.
(add_aliases_to_partition): Remove.
(add_cgraph_node_to_partition_1): Remove.
(add_cgraph_node_to_partition): Remove.
(add_symbol_to_partition): New function.
(add_symbol_to_partition_1): New function.
(contained_in_symbol): New function.
(partition_cgraph_node_p): Remove.
(partition_varpool_node_p): Remove.
(partition_symbol_p): Remove.
(lto_1_to_1_map): Cleanup.
(lto_max_map): New.
(lto_balanced_map): Update.
(lto_promote_cross_file_statics): Update.
* lto-partition.h (lto_max_map): Declare.
* timevar.def (TV_WHOPR_PARTITIONING): New timevar.
Index: common.opt
===
--- common.opt  (revision 191215)
+++ common.opt  (working copy)
@@ -1439,12 +1439,16 @@ Link-time optimization with number of pa
 
 flto-partition=1to1
 Common Var(flag_lto_partition_1to1)
-Partition functions and vars at linktime based on object files they originate 
from
+Partition symbols and vars at linktime based on object files they originate 
from
 
 flto-partition=balanced
 Common Var(flag_lto_partition_balanced)
 Partition functions and vars at linktime into approximately same sized buckets
 
+flto-partition=max
+Common Var(flag_lto_partition_max)
+Put every symbol into separate partition
+
 flto-partition=none
 Common Var(flag_lto_partition_none)
 Disable partioning and streaming
Index: lto/lto.c
===
--- lto/lto.c   (revision 191215)
+++ lto/lto.c   (working copy)
@@ -2604,11 +2604,28 @@ lto_wpa_write_files (void)
  
  fprintf (cgraph_dump_file, "Writing partition %s to file %s, %i 
insns\n",
   part->name, temp_filename, part->insns);
+ fprintf (cgraph_dump_file, "  Symbols in partition: ");
  for (lsei = lsei_start_in_partition (part->encoder); !lsei_end_p 
(lsei);
   lsei_next_in_partition (&lsei))
{
  symtab_node node = lsei_node (lsei);
- fprintf (cgraph_dump_file, "%s ", symtab_node_name (node));
+ fprintf (cgraph_dump_file, "%s ", symtab_node_asm_name (node));
+   }
+ fprintf (cgraph_dump_file, "\n  Symbols in boundary: ");
+ for (lsei = lsei_start (part->encoder); !lsei_end_p (lsei);
+  lsei_next (&lsei))
+   {
+ symtab_node node = lsei_node (lsei);
+ if (!lto_symtab_encoder_in_partition_p (part->encoder, node))
+   {
+ fprintf (cgraph_dump_file, "%s ", symtab_node_asm_name 
(node));
+ if (symtab_function_p (node)
+ && lto_symtab_encoder_encode_body_p (part->encoder, 
cgraph (node)))
+   fprintf (cgraph_dump_file, "(body included)");
+ else if (symtab_variable_p (node)
+  && lto_symtab_encoder_encode_initializer_p 
(part->encoder, varpool (node)))
+   fprintf (cgraph_dump_file, "(initializer included)");
+   }
}
  fprintf (cgraph_dump_file, "\n");
}
@@ -3093,6 +3107,8 @@ print_lto_report_1 (void)
 static void
 do_whole_program_analysis (void)
 {
+  symtab_node node;
+
   timevar_start (TV_PHASE_OPT_GEN);
 
   /* Note that since we are in WPA mode, materialize_cgraph will not
@@ -3127,17 +3143,31 @@ do_whole_program_analysis (void)
   dump_cgraph (cgraph_dump_file);
   dump_varpool (cgraph_dump_file);
 }
+#ifdef ENABLE_CHECKING
   verif

Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Xinliang David Li
On Wed, Sep 12, 2012 at 2:13 AM, Richard Guenther
 wrote:
> On Wed, Sep 12, 2012 at 7:06 AM, Dehao Chen  wrote:
>> Now I think we are facing a more complex problem. The data structure
>> we use to store the location_adhoc_data are file-static in linemap.c
>> in libcpp. These data structures are not guarded by GTY(()).
>> Meanwhile, as we have removed the block data structure from
>> gimple.gsbase as well as tree.exp (encoding them into an location_t).
>> This could cause block being GCed and the LOCATION_BLOCK becoming
>> dangling pointers.
>
> Uh.  Note that it is quite important that we are able to garbage-collect 
> unused
> BLOCKs, this is the whole point of removing unused BLOCK scopes in
> remove_unused_locals.  So this indeed becomes much more complicated ...
> What would be desired is that the garbage collector can NULL an entry in
> the mapping table when it is not referenced in any other way (that other
> reference would be the BLOCK tree as stored in a FUNCTION_DECLs DECL_INITIAL).

It would be nice to GC those unused BLOCKS. I wonder how many BLOCKS
are created for a large C++ program. This patch saves memory by
shrinking tree size, is it a net win or loss without GC those BLOCKS?

thanks,

David


>
>> I tried to manipulate GTY to make it recognize the LOCATION_BLOCK from
>> gimple.gsbase.location. However, neigher nested_ptr nor mark_hook can
>> help me.
>>
>> Another approach would be guard the location_adhoc_data and related
>> data structures in GTY(()). However, this is non-trivial because tree
>> is not visible in libcpp. At the same time, my implementation heavily
>> relies on hashtable to make the code efficient, thus it's quite tricky
>> to make "param_is" and "use_params" work.
>>
>> The final approach, which I'll try tomorrow, would be move all my
>> implementation from libcpp to gcc, and guard them with GTY(()). I
>> still haven't thought of any potential problem of this approach. Any
>> comments?
>
> I think moving the mapping to GC in a lazy manner as I described above
> would be the way to go.  For hashtables GC already supports if_marked,
> not sure if similar support is available for arrays/vecs.
>
> Richard.
>
>> Thanks,
>> Dehao
>>
>> On Tue, Sep 11, 2012 at 9:00 AM, Dehao Chen  wrote:
>>> I saw comments in tree-streamer-out.c:
>>>
>>>   /* Do not stream BLOCK_SOURCE_LOCATION.  We cannot handle debug 
>>> information
>>>  for early inlining so drop it on the floor instead of ICEing in
>>>  dwarf2out.c.  */
>>>   streamer_write_chain (ob, BLOCK_VARS (expr), ref_p);
>>>
>>> However, what the code is doing seemed contradictory with the comment.
>>> Or am I missing something?
>>>
>>>
>>>
>>> On Tue, Sep 11, 2012 at 8:32 AM, Michael Matz  wrote:
 Hi,

 On Tue, 11 Sep 2012, Dehao Chen wrote:

> Looks like we have two choices:
>
> 1. Stream out block info, and use LTO_SET_PREVAIL for TREE_CHAIN(t)

 This will actually not work correctly in some cases.  The problem is, if
 the prevailing decl is already part of another chain (say in another
 block_var list) you would break the current chain.  Hence block vars need
 special handling in the lto streamer (another reason why tree_chain is not
 the most clever think to use for this chain).  This problem area needs to
 be solved somehow if block info is to be preserved correctly.

> 2. Don't stream out block info for LTO, and still call LTO_NO_PREVAIL
> (TREE_CHAIN (t)).

 That's also a large hammer as it basically will mean no debug info after
 LTO :-/ Sigh, at this point I have no good solution that doesn't involve
 quite some work, perhaps your hack is good enough for the time being,
 though I hate it :)
>>>
>>> I got it. Then I'll keep the patch as it is (remove the
>>> LTO_NO_PREVAIL), and work with Honza to resolve the issue he had, and
>>> then we should be good to check in?
>>>
>>> Thanks,
>>> Dehao
>>>


 Ciao,
 Michael.


[PATCH] proposed fix for PRs target/54516 & rtl-optimization/54540

2012-09-12 Thread Richard Earnshaw
Although the mersenne twister code is new, the build/runtime failures of
PRs target/54516 and PR rtl-optimization/54540 can be tracked back to
the addition of the variant

add %0("=rk"), %1("0"), %2("rk") // size=2

to the thumb2 backend.  This is causing reload to try to do clever
things when we have a large stack adjustment.

Prior to reload we have a sequence of the form:

r:SI = sp:SI + (-some_const1)
sp:SI = r:SI + (-some_const2)

Since there is no thumb2 instruction to directly perform the second
operation some reloading is needed.  Reload determines that the new
variant described above is a viable candidate and sets about trying to
create reloads for it.

The first thing it does is rearranges the operands to create

sp:SI = (-some_const2) + r:SI

which now needs precisely one reload.  The tie operation forces a reload
into sp of the value -some_const2, not realizing that putting random
values into SP is potentially very dangerous to the health of your
program (or system if you're running at a privileged level).

It seems to me that find_dummy_reload is doing an unsafe thing by
permitting the use of a register in fixed_regs[] as a reload register --
writing to fixed regs could have unspecified side effects on the
machine, so they should only be used in the ways that earlier passes
have deemed to be safe, and using them as a temporary is not one of those.

So this patch fixes the problem by forcing find_dummy_reload to reject
OUT as a valid reload register for IN when OUT overlaps a register
mentioned in fixed_regs[].

Bootstrapped on arm-linux-gnueabi with no regressions.  No new tests are
needed as this fixes the execution failures in the mersenne twister tests.


* reload.c (find_dummy_reload): Don't use OUT as a reload reg
for IN if it overlaps a fixed register.

OK?--- reload.c(revision 191208)
+++ reload.c(local)
@@ -2036,7 +2036,12 @@ find_dummy_reload (rtx real_in, rtx real
 However, we only ignore IN in its role as this reload.
 If the insn uses IN elsewhere and it contains OUT,
 that counts.  We can't be sure it's the "same" operand
-so it might not go through this reload.  */
+so it might not go through this reload.  
+
+ We also need to avoid using OUT if it, or part of it, is a
+ fixed register.  Modifying such registers, even transiently,
+ may have undefined effects on the machine, such as modifying
+ the stack pointer.  */
   saved_rtx = *inloc;
   *inloc = const0_rtx;
 
@@ -2049,7 +2054,8 @@ find_dummy_reload (rtx real_in, rtx real
 
  for (i = 0; i < nwords; i++)
if (! TEST_HARD_REG_BIT (reg_class_contents[(int) rclass],
-regno + i))
+regno + i)
+   || fixed_regs[regno + i])
  break;
 
  if (i == nwords)

Re: [Patch ARM] implement bswap16

2012-09-12 Thread Richard Earnshaw
On 11/09/12 16:04, Christophe Lyon wrote:
> On 11 September 2012 12:52, Richard Earnshaw  wrote:
>> Try something like:
>>
>> short foo(int);
>>
>> short swaps (short x, int y)
>> {
>>   int z = x;
>>   if (y)
>> z = __builtin_bswap16(x);
>>   return foo (z);
>> }
>>
>> If that's not enough, try adding 1 to z before calling foo.
>>
> 
> Thanks, it works.
> It's surprising however that 'return z' isn't enough.
> 
> Here is a new version of the patch, which also transforms the 32 bits
> arm_rev/thumb1_rev into arm_rev/arm_rev_cond.
> 
> I have enhanced the testcase too.
> 
> Christophe.=
> 
> 
> bswap16.patch
> 
> 

Thanks,

This is OK.

Ramana has just pointed out to me that predication these days can be set
on a per-alternative basis, so all those extra patterns for cond-exec
can be eliminated after all (sigh!).

I'm not going to insist that you fix that (unless you want to); We've
been round this patch too many times now and the code you've written
isn't wrong...

R.






Re: [PATCH] proposed fix for PRs target/54516 & rtl-optimization/54540

2012-09-12 Thread Ulrich Weigand
Richard Earnshaw wrote:

> It seems to me that find_dummy_reload is doing an unsafe thing by
> permitting the use of a register in fixed_regs[] as a reload register --
> writing to fixed regs could have unspecified side effects on the
> machine, so they should only be used in the ways that earlier passes
> have deemed to be safe, and using them as a temporary is not one of those.

This sounds reasonable, agreed.

>   * reload.c (find_dummy_reload): Don't use OUT as a reload reg
>   for IN if it overlaps a fixed register.

OK.

Thanks,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com



Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Dehao Chen
There are two parts that needs memory management:

1. The BLOCK structure. This is managed by GC. I originally thought
that removing blocks from tree.gsbase would paralyze GC. This turned
out not to be a concern because DECL_INITIAL will still mark those
used tree nodes. This patch may decrease the memory consumption by
removing blocks from tree/gimple. However, as it makes more blocks
become used, they also increase the memory consumption.
2. The data structure in libcpp that maintains the hashtable for the
location->block mapping. This is relatively minor because for the
largest source I've seen, it only maintains less than 100K entries in
the array (less than 1M total memory consumption). However, as it is a
global data structure, it may make LTO unhappy. Honza is helping
testing the memory consumption on LTO (but we first need to make this
patch work for LTO). If the LTO result turns out ok, we probably don't
want to put these under GC because: 1. it'll make things much more
complicated. 2. using self managed memory is more efficient (as this
is frequently used in many passes). 3. not using GC actually saves
memory because even though the block is in the map, it can still be
GCed as soon as it's not reachable from DECL_INITIAL.

I've tested this on some very large C++ files (each one takes more
than 10s to build), the memory consumption does not see noticeable
increase/decrease.

Thanks,
Dehao

On Wed, Sep 12, 2012 at 9:39 AM, Xinliang David Li  wrote:
> On Wed, Sep 12, 2012 at 2:13 AM, Richard Guenther
>  wrote:
>> On Wed, Sep 12, 2012 at 7:06 AM, Dehao Chen  wrote:
>>> Now I think we are facing a more complex problem. The data structure
>>> we use to store the location_adhoc_data are file-static in linemap.c
>>> in libcpp. These data structures are not guarded by GTY(()).
>>> Meanwhile, as we have removed the block data structure from
>>> gimple.gsbase as well as tree.exp (encoding them into an location_t).
>>> This could cause block being GCed and the LOCATION_BLOCK becoming
>>> dangling pointers.
>>
>> Uh.  Note that it is quite important that we are able to garbage-collect 
>> unused
>> BLOCKs, this is the whole point of removing unused BLOCK scopes in
>> remove_unused_locals.  So this indeed becomes much more complicated ...
>> What would be desired is that the garbage collector can NULL an entry in
>> the mapping table when it is not referenced in any other way (that other
>> reference would be the BLOCK tree as stored in a FUNCTION_DECLs 
>> DECL_INITIAL).
>
> It would be nice to GC those unused BLOCKS. I wonder how many BLOCKS
> are created for a large C++ program. This patch saves memory by
> shrinking tree size, is it a net win or loss without GC those BLOCKS?
>
> thanks,
>
> David
>
>
>>
>>> I tried to manipulate GTY to make it recognize the LOCATION_BLOCK from
>>> gimple.gsbase.location. However, neigher nested_ptr nor mark_hook can
>>> help me.
>>>
>>> Another approach would be guard the location_adhoc_data and related
>>> data structures in GTY(()). However, this is non-trivial because tree
>>> is not visible in libcpp. At the same time, my implementation heavily
>>> relies on hashtable to make the code efficient, thus it's quite tricky
>>> to make "param_is" and "use_params" work.
>>>
>>> The final approach, which I'll try tomorrow, would be move all my
>>> implementation from libcpp to gcc, and guard them with GTY(()). I
>>> still haven't thought of any potential problem of this approach. Any
>>> comments?
>>
>> I think moving the mapping to GC in a lazy manner as I described above
>> would be the way to go.  For hashtables GC already supports if_marked,
>> not sure if similar support is available for arrays/vecs.
>>
>> Richard.
>>
>>> Thanks,
>>> Dehao
>>>
>>> On Tue, Sep 11, 2012 at 9:00 AM, Dehao Chen  wrote:
 I saw comments in tree-streamer-out.c:

   /* Do not stream BLOCK_SOURCE_LOCATION.  We cannot handle debug 
 information
  for early inlining so drop it on the floor instead of ICEing in
  dwarf2out.c.  */
   streamer_write_chain (ob, BLOCK_VARS (expr), ref_p);

 However, what the code is doing seemed contradictory with the comment.
 Or am I missing something?



 On Tue, Sep 11, 2012 at 8:32 AM, Michael Matz  wrote:
> Hi,
>
> On Tue, 11 Sep 2012, Dehao Chen wrote:
>
>> Looks like we have two choices:
>>
>> 1. Stream out block info, and use LTO_SET_PREVAIL for TREE_CHAIN(t)
>
> This will actually not work correctly in some cases.  The problem is, if
> the prevailing decl is already part of another chain (say in another
> block_var list) you would break the current chain.  Hence block vars need
> special handling in the lto streamer (another reason why tree_chain is not
> the most clever think to use for this chain).  This problem area needs to
> be solved somehow if block info is to be preserved correctly.
>
>> 2. Don't stream out block info for LTO, 

Re: PATCH: PR target/54445: TLS array lookup with negative constant is not combined into a single instruction

2012-09-12 Thread H.J. Lu
On Sun, Sep 2, 2012 at 7:20 AM, H.J. Lu  wrote:
> Hi,
>
> When x86-64 TLS support was added by:
>
> http://gcc.gnu.org/ml/gcc-patches/2002-10/msg01262.html
>
> it didn't allow negative offset.  Jakub, do you remember the reason for
> it?  I tested this patch on Linux/x86-64 and used the new GCC to build
> glibc for x86-64 and x32.  There are no regressions.  OK to install?
>
> Thanks.
>
>
> H.J.
> 
> gcc/
>
> 2012-09-02  H.J. Lu  
>
> PR target/54445
> * config/i386/predicates.md (x86_64_immediate_operand): Allow
> negative offset for UNSPEC_DTPOFF/UNSPEC_NTPOFF.
>
> gcc/testsuite/
> 2012-09-02  H.J. Lu  
>
> PR target/54445
> * gcc.target/i386/pr54445-1.c: New file.
> * gcc.target/i386/pr54445-2.c: Likewise.
>
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 55e4b56..159594e 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -228,8 +228,7 @@
> {
> case UNSPEC_DTPOFF:
> case UNSPEC_NTPOFF:
> - if (offset > 0
> - && trunc_int_for_mode (offset, SImode) == offset)
> + if (trunc_int_for_mode (offset, SImode) == offset)
> return true;
> }
>   break;
> diff --git a/gcc/testsuite/gcc.target/i386/pr54445-1.c 
> b/gcc/testsuite/gcc.target/i386/pr54445-1.c
> new file mode 100644
> index 000..72ef84e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr54445-1.c
> @@ -0,0 +1,24 @@
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +__thread unsigned char tls_array[64];
> +
> +unsigned char
> +__attribute__ ((noinline))
> +tls_array_lookup_with_negative_constant(long long int position) {
> +  return tls_array[position - 1];
> +}
> +
> +int
> +main ()
> +{
> +  int i;
> +
> +  for (i = 0; i < sizeof (tls_array) / sizeof (tls_array[0]); i++)
> +tls_array[i] = i;
> +
> +  for (i = 0; i < sizeof (tls_array) / sizeof (tls_array[0]); i++)
> +if (i != tls_array_lookup_with_negative_constant (i + 1))
> +  __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/pr54445-2.c 
> b/gcc/testsuite/gcc.target/i386/pr54445-2.c
> new file mode 100644
> index 000..5151c13
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr54445-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile { target { *-*-linux* && { ! { ia32 } } } } } */
> +/* { dg-options "-O2 -fno-pic" } */
> +
> +__thread unsigned char tls_array[64];
> +
> +unsigned char
> +tls_array_lookup_with_negative_constant(long long int position) {
> +  return tls_array[position - 1];
> +}
> +
> +/* { dg-final { scan-assembler "mov(b|zbl)\[ 
> \t\](%fs:)?tls_array@tpoff-1\\(%" } } */

Hi Uros,

Is this OK?

Thanks.

-- 
H.J.


Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Xinliang David Li
On Wed, Sep 12, 2012 at 10:05 AM, Dehao Chen  wrote:
> There are two parts that needs memory management:
>
> 1. The BLOCK structure. This is managed by GC. I originally thought
> that removing blocks from tree.gsbase would paralyze GC. This turned
> out not to be a concern because DECL_INITIAL will still mark those
> used tree nodes. This patch may decrease the memory consumption by
> removing blocks from tree/gimple. However, as it makes more blocks
> become used, they also increase the memory consumption.

You mean when you also make the location table GC root.

Can you share the mem-stats information for the large program with and
without your patch?

thanks,

David

> 2. The data structure in libcpp that maintains the hashtable for the
> location->block mapping. This is relatively minor because for the
> largest source I've seen, it only maintains less than 100K entries in
> the array (less than 1M total memory consumption). However, as it is a
> global data structure, it may make LTO unhappy. Honza is helping
> testing the memory consumption on LTO (but we first need to make this
> patch work for LTO). If the LTO result turns out ok, we probably don't
> want to put these under GC because: 1. it'll make things much more
> complicated. 2. using self managed memory is more efficient (as this
> is frequently used in many passes). 3. not using GC actually saves
> memory because even though the block is in the map, it can still be
> GCed as soon as it's not reachable from DECL_INITIAL.
>
> I've tested this on some very large C++ files (each one takes more
> than 10s to build), the memory consumption does not see noticeable
> increase/decrease.
>
> Thanks,
> Dehao
>
> On Wed, Sep 12, 2012 at 9:39 AM, Xinliang David Li  wrote:
>> On Wed, Sep 12, 2012 at 2:13 AM, Richard Guenther
>>  wrote:
>>> On Wed, Sep 12, 2012 at 7:06 AM, Dehao Chen  wrote:
 Now I think we are facing a more complex problem. The data structure
 we use to store the location_adhoc_data are file-static in linemap.c
 in libcpp. These data structures are not guarded by GTY(()).
 Meanwhile, as we have removed the block data structure from
 gimple.gsbase as well as tree.exp (encoding them into an location_t).
 This could cause block being GCed and the LOCATION_BLOCK becoming
 dangling pointers.
>>>
>>> Uh.  Note that it is quite important that we are able to garbage-collect 
>>> unused
>>> BLOCKs, this is the whole point of removing unused BLOCK scopes in
>>> remove_unused_locals.  So this indeed becomes much more complicated ...
>>> What would be desired is that the garbage collector can NULL an entry in
>>> the mapping table when it is not referenced in any other way (that other
>>> reference would be the BLOCK tree as stored in a FUNCTION_DECLs 
>>> DECL_INITIAL).
>>
>> It would be nice to GC those unused BLOCKS. I wonder how many BLOCKS
>> are created for a large C++ program. This patch saves memory by
>> shrinking tree size, is it a net win or loss without GC those BLOCKS?
>>
>> thanks,
>>
>> David
>>
>>
>>>
 I tried to manipulate GTY to make it recognize the LOCATION_BLOCK from
 gimple.gsbase.location. However, neigher nested_ptr nor mark_hook can
 help me.

 Another approach would be guard the location_adhoc_data and related
 data structures in GTY(()). However, this is non-trivial because tree
 is not visible in libcpp. At the same time, my implementation heavily
 relies on hashtable to make the code efficient, thus it's quite tricky
 to make "param_is" and "use_params" work.

 The final approach, which I'll try tomorrow, would be move all my
 implementation from libcpp to gcc, and guard them with GTY(()). I
 still haven't thought of any potential problem of this approach. Any
 comments?
>>>
>>> I think moving the mapping to GC in a lazy manner as I described above
>>> would be the way to go.  For hashtables GC already supports if_marked,
>>> not sure if similar support is available for arrays/vecs.
>>>
>>> Richard.
>>>
 Thanks,
 Dehao

 On Tue, Sep 11, 2012 at 9:00 AM, Dehao Chen  wrote:
> I saw comments in tree-streamer-out.c:
>
>   /* Do not stream BLOCK_SOURCE_LOCATION.  We cannot handle debug 
> information
>  for early inlining so drop it on the floor instead of ICEing in
>  dwarf2out.c.  */
>   streamer_write_chain (ob, BLOCK_VARS (expr), ref_p);
>
> However, what the code is doing seemed contradictory with the comment.
> Or am I missing something?
>
>
>
> On Tue, Sep 11, 2012 at 8:32 AM, Michael Matz  wrote:
>> Hi,
>>
>> On Tue, 11 Sep 2012, Dehao Chen wrote:
>>
>>> Looks like we have two choices:
>>>
>>> 1. Stream out block info, and use LTO_SET_PREVAIL for TREE_CHAIN(t)
>>
>> This will actually not work correctly in some cases.  The problem is, if
>> the prevailing decl is already part of another chain (say in another
>> blo

Re: Finish up PR rtl-optimization/44194

2012-09-12 Thread H.J. Lu
On Wed, Sep 12, 2012 at 8:37 AM, Eric Botcazou  wrote:
> This is the PR about the useless spilling to memory of structures that are
> returned in registers.  It was essentially addressed last year by Easwaran 
> with
> an enhancement of the RTL DSE pass, but Easwaran also noted that we still 
> spill
> to memory in the simplest cases, e.g. gcc.dg/pr44194-1.c, because expand_call
> creates a temporary on the stack to store the value returned in registers...
>
> The attached patch solves this problem by copying the value into pseudos
> instead by means of emit_group_move_into_temps.  This is sufficient to get rid
> of the remaining memory accesses for gcc.dg/pr44194-1.c on x86-64 for example,
> but not on strict-alignment platforms like SPARC64.
>
> The problem is that, on strict-alignment platforms, emit_group_store will use
> bitfield techniques (store_bit_field) to store the returned value, and the
> bitfield routines (store_bit_field and extract_bit_field) have these lines:
>
>   /* We may be accessing data outside the field, which means
>  we can alias adjacent data.  */
>   if (MEM_P (op0))
> {
>   op0 = shallow_copy_rtx (op0);
>   set_mem_alias_set (op0, 0);
>   set_mem_expr (op0, 0);
> }
>
> Now the enhancement implemented in the RTL DSE pass by Easwaran is precisely
> based on the MEM_EXPR of MEM objects.
>
> The patch solves this problem by implementing a variant of adjust_address 
> along
> the lines of the comment at the end of adjust_address_1:
>
>   /* At some point, we should validate that this offset is within the object,
>  if all the appropriate values are known.  */
>   return new_rtx;
>
> i.e. adjust_bitfield_address will drop the underlying object of the MEM if it
> cannot prove that the adjusted memory access is still within its bounds.
> The bitfield manipulation routines in expmed.c are then changed to invoke
> adjust_bitfield_address instead of adjust_address and the above special lines
> in store_bit_field and extract_bit_field are eliminated.
>
> While I was at it, I also fixed a probable oversight in extract_bit_field_1
> that has bothered me for a while: in the multi-word case, extract_bit_field_1
> recurses on extract_bit_field instead of itself (unlike store_bit_field_1),
> which short-circuits the FALLBACK_P parameter.
>
> Tested on x86-64/Linux and SPARC64/Solaris.  Comments?
>
>
> 2012-09-12  Eric Botcazou  
>
> PR rtl-optimization/44194
> * calls.c (expand_call): In the PARALLEL case, copy the return value
> into pseudos instead of spilling it onto the stack.
> * emit-rtl.c (adjust_address_1): Rename ADJUST into ADJUST_ADDRESS and
> add new ADJUST_OBJECT parameter.
> If ADJUST_OBJECT is set, drop the underlying object if it cannot be
> proved that the adjusted memory access is still within its bounds.
> (adjust_automodify_address_1): Adjust call to adjust_address_1.
> (widen_memory_access): Likewise.
> * expmed.c (store_bit_field_1): Call adjust_bitfield_address instead
> of adjust_address.  Do not drop the underlying object of a MEM.
> (store_fixed_bit_field): Likewise.
> (extract_bit_field_1): Likewise.  Fix oversight in recursion.
> (extract_fixed_bit_field): Likewise.
> * expr.h (adjust_address_1): Adjust prototype.
> (adjust_address): Adjust call to adjust_address_1.
> (adjust_address_nv): Likewise.
> (adjust_bitfield_address): New macro.
> (adjust_bitfield_address_nv): Likewise.
> * expr.c (expand_assignment): Handle a PARALLEL in more cases.
> (store_expr): Likewise.
> (store_field): Likewise.
>
> * dse.c: Fix typos in the head comment.

Will it help

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54315
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28831

Thanks.

-- 
H.J.


Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Dehao Chen
There is another bug in the patch (not covered by unittests,
discovered through spec benchmarks).

When we remove unused locals, we do not mark the block as used for
debug stmt, but gimple-streamer-out will still stream out blocks for
debug stmt. There can be 2 fixes:

1.
--- a/gcc/gimple-streamer-out.c
+++ b/gcc/gimple-streamer-out.c
@@ -77,7 +77,8 @@ output_gimple_stmt (struct output_block *ob, gimple stmt)
   lto_output_location (ob, LOCATION_LOCUS (gimple_location (stmt)));

   /* Emit the lexical block holding STMT.  */
-  stream_write_tree (ob, gimple_block (stmt), true);
+  if (!is_gimple_debug (stmt))
+stream_write_tree (ob, gimple_block (stmt), true);

   /* Emit the operands.  */
   switch (gimple_code (stmt))

2.
--- a/gcc/tree-ssa-live.c
+++ b/gcc/tree-ssa-live.c
@@ -726,9 +726,6 @@ remove_unused_locals (void)
  gimple stmt = gsi_stmt (gsi);
  tree b = gimple_block (stmt);

- if (is_gimple_debug (stmt))
-   continue;
-
  if (gimple_clobber_p (stmt))
{
  have_local_clobbers = true;

Either fix could work. Any suggests which one should we go?

Thanks,
Dehao

On Wed, Sep 12, 2012 at 10:05 AM, Dehao Chen  wrote:
> There are two parts that needs memory management:
>
> 1. The BLOCK structure. This is managed by GC. I originally thought
> that removing blocks from tree.gsbase would paralyze GC. This turned
> out not to be a concern because DECL_INITIAL will still mark those
> used tree nodes. This patch may decrease the memory consumption by
> removing blocks from tree/gimple. However, as it makes more blocks
> become used, they also increase the memory consumption.
> 2. The data structure in libcpp that maintains the hashtable for the
> location->block mapping. This is relatively minor because for the
> largest source I've seen, it only maintains less than 100K entries in
> the array (less than 1M total memory consumption). However, as it is a
> global data structure, it may make LTO unhappy. Honza is helping
> testing the memory consumption on LTO (but we first need to make this
> patch work for LTO). If the LTO result turns out ok, we probably don't
> want to put these under GC because: 1. it'll make things much more
> complicated. 2. using self managed memory is more efficient (as this
> is frequently used in many passes). 3. not using GC actually saves
> memory because even though the block is in the map, it can still be
> GCed as soon as it's not reachable from DECL_INITIAL.
>
> I've tested this on some very large C++ files (each one takes more
> than 10s to build), the memory consumption does not see noticeable
> increase/decrease.
>
> Thanks,
> Dehao
>
> On Wed, Sep 12, 2012 at 9:39 AM, Xinliang David Li  wrote:
>> On Wed, Sep 12, 2012 at 2:13 AM, Richard Guenther
>>  wrote:
>>> On Wed, Sep 12, 2012 at 7:06 AM, Dehao Chen  wrote:
 Now I think we are facing a more complex problem. The data structure
 we use to store the location_adhoc_data are file-static in linemap.c
 in libcpp. These data structures are not guarded by GTY(()).
 Meanwhile, as we have removed the block data structure from
 gimple.gsbase as well as tree.exp (encoding them into an location_t).
 This could cause block being GCed and the LOCATION_BLOCK becoming
 dangling pointers.
>>>
>>> Uh.  Note that it is quite important that we are able to garbage-collect 
>>> unused
>>> BLOCKs, this is the whole point of removing unused BLOCK scopes in
>>> remove_unused_locals.  So this indeed becomes much more complicated ...
>>> What would be desired is that the garbage collector can NULL an entry in
>>> the mapping table when it is not referenced in any other way (that other
>>> reference would be the BLOCK tree as stored in a FUNCTION_DECLs 
>>> DECL_INITIAL).
>>
>> It would be nice to GC those unused BLOCKS. I wonder how many BLOCKS
>> are created for a large C++ program. This patch saves memory by
>> shrinking tree size, is it a net win or loss without GC those BLOCKS?
>>
>> thanks,
>>
>> David
>>
>>
>>>
 I tried to manipulate GTY to make it recognize the LOCATION_BLOCK from
 gimple.gsbase.location. However, neigher nested_ptr nor mark_hook can
 help me.

 Another approach would be guard the location_adhoc_data and related
 data structures in GTY(()). However, this is non-trivial because tree
 is not visible in libcpp. At the same time, my implementation heavily
 relies on hashtable to make the code efficient, thus it's quite tricky
 to make "param_is" and "use_params" work.

 The final approach, which I'll try tomorrow, would be move all my
 implementation from libcpp to gcc, and guard them with GTY(()). I
 still haven't thought of any potential problem of this approach. Any
 comments?
>>>
>>> I think moving the mapping to GC in a lazy manner as I described above
>>> would be the way to go.  For hashtables GC already supports if_marked,
>>> not sure if similar support is available for a

Re: Backtrace library [1/3]

2012-09-12 Thread Lawrence Crowl
On 9/12/12, Ian Lance Taylor  wrote:
> On Sep 11, 2012 Lawrence Crowl  wrote:
> > On 9/11/12, Ian Lance Taylor  wrote:
> > > This patch is the interface to and configury of libbacktrace.
> > > I've separated these out as the parts of libbacktrace that
> > > require the most review.  The interface to libbacktrace is in
> > > the file backtrace.h.  This is what callers will use.  The file
> > > backtrace-supported.h is also available so that programs can
> > > see whether calling the backtrace library will work at all.
> >
> > The interface relies on global data in the library.  Wouldn't it
> > be better to expose the state as an additional parameter to
> > enable concurrent access by different threads?  That parameter
> > could then be modeled as 'this' parameter, addressing Gaby's
> > suggesting.
>
> I went ahead and added a state parameter to the interface.
> I've attached the updated patch.

How about typing it as a pointer to an incomplete struct?

extern void *backtrace_create_state (...

becomes, e.g.,

struct backtrace_state;
extern backtrace_state *backtrace_create_state (...

-- 
Lawrence Crowl


Re: Change double_int calls to new interface.

2012-09-12 Thread Lawrence Crowl
On 9/12/12, Mark Kettenis  wrote:
>> Date: Tue, 11 Sep 2012 17:03:39 -0700
>> From: Ian Lance Taylor 
>>
>> On Tue, Sep 11, 2012 at 3:12 PM, Lawrence Crowl 
>> wrote:
>> > On 9/11/12, Andreas Schwab  wrote:
>> >> Mark Kettenis  writes:
>> >>> In file included from ../../../src/gcc/gcc/mcf.c:47:0:
>> >>> ../../../src/gcc/gcc/mcf.c: In function 'void dump_fixup_edge(FILE*,
>> >>> fixup_graph_type*, fixup_edge_p)':
>> >>> ../../../src/gcc/gcc/system.h:288:78: error: integer overflow in
>> >>> expression [-Werror=overflow]
>> >>
>> >> This is PR54528.
>> >
>> > The expression itself looks correct.  I have not been able to
>> > duplicate the problem on x86.  I am now waiting on access to the
>> > compile farm for access to a hppa system.  Does anyone have more
>> > specific information on the condition that generates the error?
>>
>> I haven't tried, but I bet you can get it to happen if you build a
>> 32-bit compiler--that is, build a compiler using -m32 so that the
>> compiler itself runs 32 bit code.
>
> I don't think that helps since I don't see the problem on OpenBSD/i386
> (i386-unknown-openbsd5.2) whith a strictly 32-bit compiler.  As I
> wrote earlier, I suspect the key factor is HOST_WIDE_INT being 32-bit,
> which means 32-bit architectures that don't sey need_64bit_hwint in
> config.gcc.  The fact that m68k is affected as well strengthens my
> suspicion.  So I expect arm to show the problem as well.  But people
> probably haven't noticed since they cross-compile.
>
> Anyway the "diff" below seems to fix the issue.  Guess the replacement
> doesn't work!  Should be a big enough clue for Lawrence to come up
> with a proper fix.
>
> Index: fold-const.c
> ===
> --- fold-const.c  (revision 191150)
> +++ fold-const.c  (working copy)
> @@ -982,13 +982,15 @@
>break;
>
>  case MINUS_EXPR:
> -/* FIXME(crowl) Remove this code if the replacment works.
> +  /* FIXME(crowl) Remove this code if the replacment works.  */
> +#if 1
>neg_double (op2.low, op2.high, &res.low, &res.high);
>add_double (op1.low, op1.high, res.low, res.high,
> &res.low, &res.high);
>overflow = OVERFLOW_SUM_SIGN (res.high, op2.high, op1.high);
> -*/
> +#else
>res = op1.add_with_sign (-op2, false, &overflow);
> +#endif
>break;
>
>  case MULT_EXPR:

My suspicions were on the shift operation.  I will update the last patch
I sent out with a fix.  I will probably need help testing.

-- 
Lawrence Crowl


Loop stride optimization hint

2012-09-12 Thread Jan Hubicka
Hi,
for Fortran one of common reason to inline is because array descriptor is known 
and defines
loop stride.  This patch makes ipa-inline-analysis to notice these cases.

Bootstrapped/regtested x86_64-linux, will commit it tomorrow if there are no 
complains.

Honza

* ipa-inline-analysis.c (dump_inline_hints): Dump loop stride.
(set_hint_predicate): New function.
(reset_inline_summary): Reset loop stride.
(remap_predicate_after_duplication): New function.
(remap_hint_predicate_after_duplication): New function.
(inline_node_duplication_hook): Update.
(dump_inline_summary): Dump stride summaries.
(estimate_function_body_sizes): Compute strides.
(remap_hint_predicate): New function.
(inline_merge_summary): Use it.
(inline_read_section): Read stride.
(inline_write_summary): Write stride.
* ipa-inline.c (want_inline_small_function_p): Handle strides.
(edge_badness): Likewise.
* ipa-inline.h (inline_hints_vals): Add stride hint.
(inline_summary): Update stride.

* gcc.dg/ipa/inlinehint-2.c: New testcase.
Index: ipa-inline-analysis.c
===
*** ipa-inline-analysis.c   (revision 191228)
--- ipa-inline-analysis.c   (working copy)
*** dump_inline_hints (FILE *f, inline_hints
*** 634,639 
--- 634,644 
hints &= ~INLINE_HINT_loop_iterations;
fprintf (f, " loop_iterations");
  }
+   if (hints & INLINE_HINT_loop_stride)
+ {
+   hints &= ~INLINE_HINT_loop_stride;
+   fprintf (f, " loop_stride");
+ }
gcc_assert (!hints);
  }
  
*** edge_set_predicate (struct cgraph_edge *
*** 719,724 
--- 724,749 
  }
  }
  
+ /* Set predicate for hint *P.  */
+ 
+ static void
+ set_hint_predicate (struct predicate **p, struct predicate new_predicate)
+ {
+   if (false_predicate_p (&new_predicate)
+   || true_predicate_p (&new_predicate))
+ {
+   if (*p)
+   pool_free (edge_predicate_pool, *p);
+   *p = NULL;
+ }
+   else
+ {
+   if (!*p)
+   *p = (struct predicate *)pool_alloc (edge_predicate_pool);
+   **p = new_predicate;
+ }
+ }
+ 
  
  /* KNOWN_VALS is partial mapping of parameters of NODE to constant values.
 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
*** reset_inline_summary (struct cgraph_node
*** 953,958 
--- 978,988 
pool_free (edge_predicate_pool, info->loop_iterations);
info->loop_iterations = NULL;
  }
+   if (info->loop_stride)
+ {
+   pool_free (edge_predicate_pool, info->loop_stride);
+   info->loop_stride = NULL;
+ }
VEC_free (condition, gc, info->conds);
VEC_free (size_time_entry,gc, info->entry);
for (e = node->callees; e; e = e->next_callee)
*** inline_node_removal_hook (struct cgraph_
*** 975,980 
--- 1005,1056 
memset (info, 0, sizeof (inline_summary_t));
  }
  
+ /* Remap predicate P of former function to be predicate of duplicated 
functoin.
+POSSIBLE_TRUTHS is clause of possible truths in the duplicated node,
+INFO is inline summary of the duplicated node.  */
+ 
+ static struct predicate
+ remap_predicate_after_duplication (struct predicate *p,
+  clause_t possible_truths,
+  struct inline_summary *info)
+ {
+   struct predicate new_predicate = true_predicate ();
+   int j;
+   for (j = 0; p->clause[j]; j++)
+ if (!(possible_truths & p->clause[j]))
+   {
+   new_predicate = false_predicate ();
+   break;
+   }
+ else
+   add_clause (info->conds, &new_predicate,
+ possible_truths & p->clause[j]);
+   return new_predicate;
+ }
+ 
+ /* Same as remap_predicate_after_duplication but handle hint predicate *P.
+Additionally care about allocating new memory slot for updated predicate
+and set it to NULL when it becomes true or false (and thus uninteresting).
+  */
+ 
+ static void
+ remap_hint_predicate_after_duplication (struct predicate **p,
+   clause_t possible_truths,
+   struct inline_summary *info)
+ {
+   struct predicate new_predicate;
+ 
+   if (!*p)
+ return;
+ 
+   new_predicate = remap_predicate_after_duplication (*p,
+possible_truths,
+info);
+   /* We do not want to free previous predicate; it is used by node origin.  */
+   *p = NULL;
+   set_hint_predicate (p, new_predicate);
+ }
+ 
  
  /* Hook that is called by cgraph.c when a node is duplicated.  */
  
*** inline_node_duplication_hook (struct cgr
*** 1042,1057 
 to be true.  */
for (i = 0; VEC_iterate (size_time_entry, entry, i, e); i++)
{
! struct predica

Re: PATCH: PR target/54445: TLS array lookup with negative constant is not combined into a single instruction

2012-09-12 Thread Uros Bizjak
On Wed, Sep 12, 2012 at 7:17 PM, H.J. Lu  wrote:

>> When x86-64 TLS support was added by:
>>
>> http://gcc.gnu.org/ml/gcc-patches/2002-10/msg01262.html
>>
>> it didn't allow negative offset.  Jakub, do you remember the reason for
>> it?  I tested this patch on Linux/x86-64 and used the new GCC to build
>> glibc for x86-64 and x32.  There are no regressions.  OK to install?
>
>> 2012-09-02  H.J. Lu  
>>
>> PR target/54445
>> * config/i386/predicates.md (x86_64_immediate_operand): Allow
>> negative offset for UNSPEC_DTPOFF/UNSPEC_NTPOFF.
>>
>> gcc/testsuite/
>> 2012-09-02  H.J. Lu  
>>
>> PR target/54445
>> * gcc.target/i386/pr54445-1.c: New file.
>> * gcc.target/i386/pr54445-2.c: Likewise.

The spec does not require positive offsets, and truncations will be
detected by the linker anyway.

So, looks OK to me.

Thanks,
Uros.


[4.7 PATCH, i386]: Enable prefetchw for selected AMD targets

2012-09-12 Thread Uros Bizjak
Hello!

Newer AMD processors does not enable 3dNOW anymore. However, prefetchw
depends on TARGET_3DNOW, so it is not generated anymore. Following
patch is a 4.7 version of mainline patch [1].

2012-09-12  Uros Bizjak  

* config/i386/i386.h (x86_prefetchw): New global variable.
(TARGET_PREFETCHW): New macro.
* config/i386/i386.c (PTA_PREFETCHW): Ditto.
(processor_alias_table): Add PTA_PREFETCHW to
bdver1, bdver2 and btver1.
(ix86_option_override_internal): Set x86_prefetchw for
PTA_PREFETCHW targets.
* config/i386/i386.md (prefetch): Expand to prefetchw
for TARGET_PREFETCHW.
(*prefetch_3dnow_): Also enable for TARGET_PREFETCHW.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}. Will be committed to 4.7 branch after [1] is committed to
mainline.

[1] http://gcc.gnu.org/ml/gcc-patches/2012-09/msg00670.html

Uros.
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 191227)
+++ config/i386/i386.c  (working copy)
@@ -2428,9 +2428,12 @@ enum processor_type ix86_tune;
 /* Which instruction set architecture to use.  */
 enum processor_type ix86_arch;
 
-/* true if sse prefetch instruction is not NOOP.  */
+/* True if processor has SSE prefetch instruction.  */
 int x86_prefetch_sse;
 
+/* True if processor has prefetchw instruction.  */
+int x86_prefetchw;
+ 
 /* -mstackrealign option */
 static const char ix86_force_align_arg_pointer_string[]
   = "force_align_arg_pointer";
@@ -2931,6 +2934,8 @@ ix86_option_override_internal (bool main_args_p)
 #define PTA_XOP(HOST_WIDE_INT_1 << 29)
 #define PTA_AVX2   (HOST_WIDE_INT_1 << 30)
 #define PTA_BMI2   (HOST_WIDE_INT_1 << 31)
+#define PTA_PREFETCHW  (HOST_WIDE_INT_1 << 32)
+
 /* if this reaches 64, need to widen struct pta flags below */
 
   static struct pta
@@ -3038,19 +3043,19 @@ ix86_option_override_internal (bool main_args_p)
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
   {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
-   PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-   | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-   | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-   | PTA_XOP | PTA_LWP},
+   PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+   | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+   | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+   | PTA_FMA4 | PTA_XOP | PTA_LWP},
   {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
-   PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-   | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-   | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-   | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
+   PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+   | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+   | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+   | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
| PTA_FMA},
   {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
-PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
-| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
+   PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+   | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16},
   {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
0 /* flags are only used for -march switch.  */ },
   {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
@@ -3358,6 +3363,8 @@ ix86_option_override_internal (bool main_args_p)
  ix86_isa_flags |= OPTION_MASK_ISA_F16C;
if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
  x86_prefetch_sse = true;
+   if (processor_alias_table[i].flags & PTA_PREFETCHW)
+ x86_prefetchw = true;
 
break;
   }
Index: config/i386/i386.h
===
--- config/i386/i386.h  (revision 191226)
+++ config/i386/i386.h  (working copy)
@@ -450,9 +450,11 @@ extern unsigned char ix86_arch_features[X86_ARCH_L
 #define TARGET_FISTTP  (TARGET_SSE3 && TARGET_80387)
 
 extern int x86_prefetch_sse;
-
 #define TARGET_PREFETCH_SSEx86_prefetch_sse
 
+extern int x86_prefetchw;
+#define TARGET_PREFETCHW   x86_prefetchw
+
 #define ASSEMBLER_DIALECT  (ix86_asm_dialect)
 
 #define TARGET_SSE_MATH((ix86_fpmath & FPMATH_SSE) != 0)
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 191227)
+++ config/i386/i386.md (working copy)
@@ -17671,12 +17671,14 @@
   gcc_assert (locality >= 0 && locality <= 3);
   gcc_assert (GET_MODE (operands[0]) == Pmode
  || GET_MODE 

Re: [4.7 PATCH, i386]: Enable prefetchw for selected AMD targets

2012-09-12 Thread H.J. Lu
On Wed, Sep 12, 2012 at 11:20 AM, Uros Bizjak  wrote:
> Hello!
>
> Newer AMD processors does not enable 3dNOW anymore. However, prefetchw
> depends on TARGET_3DNOW, so it is not generated anymore. Following
> patch is a 4.7 version of mainline patch [1].
>
> 2012-09-12  Uros Bizjak  
>
> * config/i386/i386.h (x86_prefetchw): New global variable.
> (TARGET_PREFETCHW): New macro.
> * config/i386/i386.c (PTA_PREFETCHW): Ditto.
> (processor_alias_table): Add PTA_PREFETCHW to
> bdver1, bdver2 and btver1.
> (ix86_option_override_internal): Set x86_prefetchw for
> PTA_PREFETCHW targets.
> * config/i386/i386.md (prefetch): Expand to prefetchw
> for TARGET_PREFETCHW.
> (*prefetch_3dnow_): Also enable for TARGET_PREFETCHW.
>
> Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
> {,-m32}. Will be committed to 4.7 branch after [1] is committed to
> mainline.
>
> [1] http://gcc.gnu.org/ml/gcc-patches/2012-09/msg00670.html

Do we need to update driver-i386.c?

-- 
H.J.


Re: [AArch64] fix missing Dwarf call frame information in the epilogue

2012-09-12 Thread Richard Henderson
On 09/12/2012 09:10 AM, Yufeng Zhang wrote:
> aarch64_set_frame_expr (gen_rtx_SET
> (Pmode,
>  stack_pointer_rtx,
> -gen_rtx_PLUS (Pmode, stack_pointer_rtx,
> +gen_rtx_PLUS (Pmode, cfa_reg,
>GEN_INT (offset;

We'd prefer to use

  plus_constant (Pmode, cfa_reg, offset)

instead of the explicit call to gen_rtx_PLUS and GEN_INT.
It would appear that the entire aarch64.c file ought to 
be audited for that.

Also, use of the REG_CFA_* notes is strongly encouraged over
use of REG_FRAME_RELATED_EXPR.

There's all sorts of work involved in turning R_F_R_E into
R_CFA_* notes, depending on a rather large state machine.
This state machine was developed when only prologues were
annotated for unwinding, and therefore one cannot expect it
to work reliably for epilogues.

A long-term goal is to convert all targets to use R_CFA_*
exclusively, as that preserves much more information present
in the structure of the code of the prologue generator.  It
means less work within the compiler, and eventually being able
to remove a rather large hunk of state-machine code.


r~


Re: [4.7 PATCH, i386]: Enable prefetchw for selected AMD targets

2012-09-12 Thread Uros Bizjak
On Wed, Sep 12, 2012 at 8:30 PM, H.J. Lu  wrote:

>> Newer AMD processors does not enable 3dNOW anymore. However, prefetchw
>> depends on TARGET_3DNOW, so it is not generated anymore. Following
>> patch is a 4.7 version of mainline patch [1].
>>
>> 2012-09-12  Uros Bizjak  
>>
>> * config/i386/i386.h (x86_prefetchw): New global variable.
>> (TARGET_PREFETCHW): New macro.
>> * config/i386/i386.c (PTA_PREFETCHW): Ditto.
>> (processor_alias_table): Add PTA_PREFETCHW to
>> bdver1, bdver2 and btver1.
>> (ix86_option_override_internal): Set x86_prefetchw for
>> PTA_PREFETCHW targets.
>> * config/i386/i386.md (prefetch): Expand to prefetchw
>> for TARGET_PREFETCHW.
>> (*prefetch_3dnow_): Also enable for TARGET_PREFETCHW.
>>
>> Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
>> {,-m32}. Will be committed to 4.7 branch after [1] is committed to
>> mainline.
>>
>> [1] http://gcc.gnu.org/ml/gcc-patches/2012-09/msg00670.html
>
> Do we need to update driver-i386.c?

No, the patch is not a backport of -mprfchw, but statically enables
prefetchw for targets that have prefetchw, but don't define
TARGET_3DNOW anymore.

Uros.


Re: Backtrace library [1/3]

2012-09-12 Thread Ian Lance Taylor
On Wed, Sep 12, 2012 at 9:23 AM, Joseph S. Myers
 wrote:
> On Tue, 11 Sep 2012, Ian Lance Taylor wrote:
>
>> The configury is fairly standard.  Note that libbacktrace is built as
>> both a host library (to link into the compilers) and as a target library
>> (to link into libgo and possibly other libraries).
>
> Under what circumstances will the library be built for the target - only
> if a relevant language such as Go is being built, or unconditionally?

My intent is to only build it when something needs it, e.g., libgo.  I
don't know if I've expressed that intent correctly.

> If built unconditionally, will the library build OK for the target in
> situations where no target headers are yet available?  (For such builds of
> compilers used to bootstrap libc it would be usual to use various
> --disable- options to disable libraries not needed to build libc, and to
> use --enable-languages=c, but if this library is used on the host side by
> the compiler then the generic --disable-libbacktrace might not suffice
> since that would disable the host copy, required by the compiler itself,
> as well as the target copy.)

The library does currently assume that a few header files are
available, so building it would presumably break in this scenario.  We
could introduce --disable-target-libbacktrace easily enough.

Ian


Re: Backtrace library [1/3]

2012-09-12 Thread Ian Lance Taylor
On Wed, Sep 12, 2012 at 10:31 AM, Lawrence Crowl  wrote:
>
> How about typing it as a pointer to an incomplete struct?
>
> extern void *backtrace_create_state (...
>
> becomes, e.g.,
>
> struct backtrace_state;
> extern backtrace_state *backtrace_create_state (...

Yeah, that is probably the way to do it.  I made that change.  I won't
bother to send around the patches again.

Ian


[PATCH, i386]: Change x86_prefetch_sse to unsigned char

2012-09-12 Thread Uros Bizjak
Hello!

This will match "fake" bool type that is used throughout gcc sources.
We can't use bool there, since the header is used in libgcc which
doesn't include system.h

2012-09-12  Uros Bizjak  

* config/i386/i386.c (x86_prefetch_sse): Change to unsigned char.
* config/i386/i386.h (x86_prefetch_sse): Ditto.

Tested on x86_64-pc-linux-gnu {,-m32} and committed to mainline SVN.

Uros.
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 191226)
+++ config/i386/i386.c  (working copy)
@@ -2517,8 +2517,8 @@
 /* Which instruction set architecture to use.  */
 enum processor_type ix86_arch;
 
-/* true if sse prefetch instruction is not NOOP.  */
-int x86_prefetch_sse;
+/* True if processor has SSE prefetch instruction.  */
+unsigned char x86_prefetch_sse;
 
 /* -mstackrealign option */
 static const char ix86_force_align_arg_pointer_string[]
Index: config/i386/i386.h
===
--- config/i386/i386.h  (revision 191224)
+++ config/i386/i386.h  (working copy)
@@ -458,8 +458,7 @@
 
 #define TARGET_FISTTP  (TARGET_SSE3 && TARGET_80387)
 
-extern int x86_prefetch_sse;
-
+extern unsigned char x86_prefetch_sse;
 #define TARGET_PREFETCH_SSEx86_prefetch_sse
 
 #define ASSEMBLER_DIALECT  (ix86_asm_dialect)


Re: Loop stride optimization hint

2012-09-12 Thread Tobias Burnus

Jan Hubicka wrote:

for Fortran one of common reason to inline is because array descriptor is known 
and defines
loop stride.  This patch makes ipa-inline-analysis to notice these cases.


Thanks for your Fortran inlining work.


+ t(int s, void **p)
+ {
+   int i;
+   for (i;i<1;i+=s)
+ p[i]=0;
+ }
+ m(void **p)
+ {
+   t (10);


Shouldn't that be   t(10, p)?

Tobias



Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Dehao Chen
Attached is the memory consumption report for a very large source
file. Looks like this patch actually reduced the memory consumption by
2%.

Dehao

On Thu, Sep 13, 2012 at 1:18 AM, Xinliang David Li  wrote:
> On Wed, Sep 12, 2012 at 10:05 AM, Dehao Chen  wrote:
>> There are two parts that needs memory management:
>>
>> 1. The BLOCK structure. This is managed by GC. I originally thought
>> that removing blocks from tree.gsbase would paralyze GC. This turned
>> out not to be a concern because DECL_INITIAL will still mark those
>> used tree nodes. This patch may decrease the memory consumption by
>> removing blocks from tree/gimple. However, as it makes more blocks
>> become used, they also increase the memory consumption.
>
> You mean when you also make the location table GC root.
>
> Can you share the mem-stats information for the large program with and
> without your patch?
>
> thanks,
>
> David
>
>> 2. The data structure in libcpp that maintains the hashtable for the
>> location->block mapping. This is relatively minor because for the
>> largest source I've seen, it only maintains less than 100K entries in
>> the array (less than 1M total memory consumption). However, as it is a
>> global data structure, it may make LTO unhappy. Honza is helping
>> testing the memory consumption on LTO (but we first need to make this
>> patch work for LTO). If the LTO result turns out ok, we probably don't
>> want to put these under GC because: 1. it'll make things much more
>> complicated. 2. using self managed memory is more efficient (as this
>> is frequently used in many passes). 3. not using GC actually saves
>> memory because even though the block is in the map, it can still be
>> GCed as soon as it's not reachable from DECL_INITIAL.
>>
>> I've tested this on some very large C++ files (each one takes more
>> than 10s to build), the memory consumption does not see noticeable
>> increase/decrease.
>>
>> Thanks,
>> Dehao
>>
>> On Wed, Sep 12, 2012 at 9:39 AM, Xinliang David Li  
>> wrote:
>>> On Wed, Sep 12, 2012 at 2:13 AM, Richard Guenther
>>>  wrote:
 On Wed, Sep 12, 2012 at 7:06 AM, Dehao Chen  wrote:
> Now I think we are facing a more complex problem. The data structure
> we use to store the location_adhoc_data are file-static in linemap.c
> in libcpp. These data structures are not guarded by GTY(()).
> Meanwhile, as we have removed the block data structure from
> gimple.gsbase as well as tree.exp (encoding them into an location_t).
> This could cause block being GCed and the LOCATION_BLOCK becoming
> dangling pointers.

 Uh.  Note that it is quite important that we are able to garbage-collect 
 unused
 BLOCKs, this is the whole point of removing unused BLOCK scopes in
 remove_unused_locals.  So this indeed becomes much more complicated ...
 What would be desired is that the garbage collector can NULL an entry in
 the mapping table when it is not referenced in any other way (that other
 reference would be the BLOCK tree as stored in a FUNCTION_DECLs 
 DECL_INITIAL).
>>>
>>> It would be nice to GC those unused BLOCKS. I wonder how many BLOCKS
>>> are created for a large C++ program. This patch saves memory by
>>> shrinking tree size, is it a net win or loss without GC those BLOCKS?
>>>
>>> thanks,
>>>
>>> David
>>>
>>>

> I tried to manipulate GTY to make it recognize the LOCATION_BLOCK from
> gimple.gsbase.location. However, neigher nested_ptr nor mark_hook can
> help me.
>
> Another approach would be guard the location_adhoc_data and related
> data structures in GTY(()). However, this is non-trivial because tree
> is not visible in libcpp. At the same time, my implementation heavily
> relies on hashtable to make the code efficient, thus it's quite tricky
> to make "param_is" and "use_params" work.
>
> The final approach, which I'll try tomorrow, would be move all my
> implementation from libcpp to gcc, and guard them with GTY(()). I
> still haven't thought of any potential problem of this approach. Any
> comments?

 I think moving the mapping to GC in a lazy manner as I described above
 would be the way to go.  For hashtables GC already supports if_marked,
 not sure if similar support is available for arrays/vecs.

 Richard.

> Thanks,
> Dehao
>
> On Tue, Sep 11, 2012 at 9:00 AM, Dehao Chen  wrote:
>> I saw comments in tree-streamer-out.c:
>>
>>   /* Do not stream BLOCK_SOURCE_LOCATION.  We cannot handle debug 
>> information
>>  for early inlining so drop it on the floor instead of ICEing in
>>  dwarf2out.c.  */
>>   streamer_write_chain (ob, BLOCK_VARS (expr), ref_p);
>>
>> However, what the code is doing seemed contradictory with the comment.
>> Or am I missing something?
>>
>>
>>
>> On Tue, Sep 11, 2012 at 8:32 AM, Michael Matz  wrote:
>>> Hi,
>>>
>>> On Tue, 11 Se

[PATCH] Fix PR gcov-profile/54487 (profiledbootstrap intermittent failures) (issue6496113)

2012-09-12 Thread Teresa Johnson
This fixes PR gcov-profile/54487 where the gcda files were not locked
by the profile-use read, enabling writes by other instrumented compiles
to change the profile in the middle of the profile use read. The GCOV_LOCKED
macro was not set because it was guarded by HOST_HAS_F_SETLKW, which was
never set. The fix is to add a compile test in the configure to set it.

Tested with bootstrap and profiledbootstrap on x86_64-unknown-linux-gnu.
Ok for trunk?

Thanks,
Teresa

2012-09-12  Teresa Johnson  

* configure.ac(HOST_HAS_F_SETLKW): Set based on compile
test using F_SETLKW with fcntl.
* configure, config.in: Regenerate.

Index: configure
===
--- configure   (revision 191225)
+++ configure   (working copy)
@@ -10731,6 +10731,41 @@ $as_echo "#define HAVE_CLOCK_T 1" >>confdefs.h
 
 fi
 
+# Check if F_SETLKW is supported by fcntl.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for F_SETLKW" >&5
+$as_echo_n "checking for F_SETLKW... " >&6; }
+if test "${ac_cv_f_setlkw+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include "fcntl.h"
+
+int
+main ()
+{
+struct flock fl; fl.l_whence = 0; fl.l_start = 0; fl.l_len = 0; fl.l_pid = 0; 
return fcntl (1, F_SETLKW, &fl);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_f_setlkw=yes
+else
+  ac_cv_f_setlkw=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_f_setlkw" >&5
+$as_echo "$ac_cv_f_setlkw" >&6; }
+if test $ac_cv_f_setlkw = yes; then
+
+$as_echo "#define HOST_HAS_F_SETLKW 1" >>confdefs.h
+
+fi
+
 # Restore CFLAGS, CXXFLAGS from before the gcc_AC_NEED_DECLARATIONS tests.
 CFLAGS="$saved_CFLAGS"
 CXXFLAGS="$saved_CXXFLAGS"
@@ -17742,7 +1,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 17745 "configure"
+#line 17780 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17848,7 +17883,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 17851 "configure"
+#line 17886 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
Index: config.in
===
--- config.in   (revision 191225)
+++ config.in   (working copy)
@@ -1600,6 +1600,12 @@
 #endif
 
 
+/* Define if F_SETLKW supported by fcntl. */
+#ifndef USED_FOR_TARGET
+#undef HOST_HAS_F_SETLKW
+#endif
+
+
 /* Define as const if the declaration of iconv() needs const. */
 #ifndef USED_FOR_TARGET
 #undef ICONV_CONST
Index: configure.ac
===
--- configure.ac(revision 191225)
+++ configure.ac(working copy)
@@ -1159,6 +1159,16 @@ if test $gcc_cv_type_clock_t = yes; then
   [Define if  defines clock_t.])
 fi
 
+# Check if F_SETLKW is supported by fcntl.
+AC_CACHE_CHECK(for F_SETLKW, ac_cv_f_setlkw, [
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+#include "fcntl.h"
+]], [[struct flock fl; fl.l_whence = 0; fl.l_start = 0; fl.l_len = 0; fl.l_pid 
= 0; return fcntl (1, F_SETLKW, 
&fl);]])],[ac_cv_f_setlkw=yes],[ac_cv_f_setlkw=no])])
+if test $ac_cv_f_setlkw = yes; then
+  AC_DEFINE(HOST_HAS_F_SETLKW, 1,
+  [Define if F_SETLKW supported by fcntl.])
+fi
+
 # Restore CFLAGS, CXXFLAGS from before the gcc_AC_NEED_DECLARATIONS tests.
 CFLAGS="$saved_CFLAGS"
 CXXFLAGS="$saved_CXXFLAGS"

--
This patch is available for review at http://codereview.appspot.com/6496113


Re: [PATCH] Fix PR gcov-profile/54487 (profiledbootstrap intermittent failures) (issue6496113)

2012-09-12 Thread Jakub Jelinek
On Wed, Sep 12, 2012 at 01:45:12PM -0700, Teresa Johnson wrote:
> This fixes PR gcov-profile/54487 where the gcda files were not locked
> by the profile-use read, enabling writes by other instrumented compiles
> to change the profile in the middle of the profile use read. The GCOV_LOCKED
> macro was not set because it was guarded by HOST_HAS_F_SETLKW, which was
> never set. The fix is to add a compile test in the configure to set it.
> 
> Tested with bootstrap and profiledbootstrap on x86_64-unknown-linux-gnu.
> Ok for trunk?
> 
> Thanks,
> Teresa
> 
> 2012-09-12  Teresa Johnson  
> 

Please include
PR gcov-profile/54487
here in the ChangeLog entry.

>   * configure.ac(HOST_HAS_F_SETLKW): Set based on compile

Space before (.

>   test using F_SETLKW with fcntl.
>   * configure, config.in: Regenerate.
> 
> --- configure.ac  (revision 191225)
> +++ configure.ac  (working copy)
> @@ -1159,6 +1159,16 @@ if test $gcc_cv_type_clock_t = yes; then
>[Define if  defines clock_t.])
>  fi
>  
> +# Check if F_SETLKW is supported by fcntl.
> +AC_CACHE_CHECK(for F_SETLKW, ac_cv_f_setlkw, [
> +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
> +#include "fcntl.h"

Please use
#include 
instead.

> +]], [[struct flock fl; fl.l_whence = 0; fl.l_start = 0; fl.l_len = 0; 
> fl.l_pid = 0; return fcntl (1, F_SETLKW, 
> &fl);]])],[ac_cv_f_setlkw=yes],[ac_cv_f_setlkw=no])])

And split this overlong line, there is no reason why you can't use a newline
e.g. after every ; in the test proglet.

> +if test $ac_cv_f_setlkw = yes; then
> +  AC_DEFINE(HOST_HAS_F_SETLKW, 1,
> +  [Define if F_SETLKW supported by fcntl.])
> +fi
> +
>  # Restore CFLAGS, CXXFLAGS from before the gcc_AC_NEED_DECLARATIONS tests.
>  CFLAGS="$saved_CFLAGS"
>  CXXFLAGS="$saved_CXXFLAGS"

Ok for trunk with those changes, IMHO it would be worthwhile to put this
into 4.7 too, I've seen several unexplained profiledbootstrap errors on
that branch in the past already when using make -jN with high N.

Jakub


Re: [PATCH] Combine location with block using block_locations

2012-09-12 Thread Xinliang David Li
For the largest bucket (size==80), the size reduction is 20%. Not bad.

David

On Wed, Sep 12, 2012 at 1:44 PM, Dehao Chen  wrote:
> Attached is the memory consumption report for a very large source
> file. Looks like this patch actually reduced the memory consumption by
> 2%.
>
> Dehao
>
> On Thu, Sep 13, 2012 at 1:18 AM, Xinliang David Li  wrote:
>> On Wed, Sep 12, 2012 at 10:05 AM, Dehao Chen  wrote:
>>> There are two parts that needs memory management:
>>>
>>> 1. The BLOCK structure. This is managed by GC. I originally thought
>>> that removing blocks from tree.gsbase would paralyze GC. This turned
>>> out not to be a concern because DECL_INITIAL will still mark those
>>> used tree nodes. This patch may decrease the memory consumption by
>>> removing blocks from tree/gimple. However, as it makes more blocks
>>> become used, they also increase the memory consumption.
>>
>> You mean when you also make the location table GC root.
>>
>> Can you share the mem-stats information for the large program with and
>> without your patch?
>>
>> thanks,
>>
>> David
>>
>>> 2. The data structure in libcpp that maintains the hashtable for the
>>> location->block mapping. This is relatively minor because for the
>>> largest source I've seen, it only maintains less than 100K entries in
>>> the array (less than 1M total memory consumption). However, as it is a
>>> global data structure, it may make LTO unhappy. Honza is helping
>>> testing the memory consumption on LTO (but we first need to make this
>>> patch work for LTO). If the LTO result turns out ok, we probably don't
>>> want to put these under GC because: 1. it'll make things much more
>>> complicated. 2. using self managed memory is more efficient (as this
>>> is frequently used in many passes). 3. not using GC actually saves
>>> memory because even though the block is in the map, it can still be
>>> GCed as soon as it's not reachable from DECL_INITIAL.
>>>
>>> I've tested this on some very large C++ files (each one takes more
>>> than 10s to build), the memory consumption does not see noticeable
>>> increase/decrease.
>>>
>>> Thanks,
>>> Dehao
>>>
>>> On Wed, Sep 12, 2012 at 9:39 AM, Xinliang David Li  
>>> wrote:
 On Wed, Sep 12, 2012 at 2:13 AM, Richard Guenther
  wrote:
> On Wed, Sep 12, 2012 at 7:06 AM, Dehao Chen  wrote:
>> Now I think we are facing a more complex problem. The data structure
>> we use to store the location_adhoc_data are file-static in linemap.c
>> in libcpp. These data structures are not guarded by GTY(()).
>> Meanwhile, as we have removed the block data structure from
>> gimple.gsbase as well as tree.exp (encoding them into an location_t).
>> This could cause block being GCed and the LOCATION_BLOCK becoming
>> dangling pointers.
>
> Uh.  Note that it is quite important that we are able to garbage-collect 
> unused
> BLOCKs, this is the whole point of removing unused BLOCK scopes in
> remove_unused_locals.  So this indeed becomes much more complicated ...
> What would be desired is that the garbage collector can NULL an entry in
> the mapping table when it is not referenced in any other way (that other
> reference would be the BLOCK tree as stored in a FUNCTION_DECLs 
> DECL_INITIAL).

 It would be nice to GC those unused BLOCKS. I wonder how many BLOCKS
 are created for a large C++ program. This patch saves memory by
 shrinking tree size, is it a net win or loss without GC those BLOCKS?

 thanks,

 David


>
>> I tried to manipulate GTY to make it recognize the LOCATION_BLOCK from
>> gimple.gsbase.location. However, neigher nested_ptr nor mark_hook can
>> help me.
>>
>> Another approach would be guard the location_adhoc_data and related
>> data structures in GTY(()). However, this is non-trivial because tree
>> is not visible in libcpp. At the same time, my implementation heavily
>> relies on hashtable to make the code efficient, thus it's quite tricky
>> to make "param_is" and "use_params" work.
>>
>> The final approach, which I'll try tomorrow, would be move all my
>> implementation from libcpp to gcc, and guard them with GTY(()). I
>> still haven't thought of any potential problem of this approach. Any
>> comments?
>
> I think moving the mapping to GC in a lazy manner as I described above
> would be the way to go.  For hashtables GC already supports if_marked,
> not sure if similar support is available for arrays/vecs.
>
> Richard.
>
>> Thanks,
>> Dehao
>>
>> On Tue, Sep 11, 2012 at 9:00 AM, Dehao Chen  wrote:
>>> I saw comments in tree-streamer-out.c:
>>>
>>>   /* Do not stream BLOCK_SOURCE_LOCATION.  We cannot handle debug 
>>> information
>>>  for early inlining so drop it on the floor instead of ICEing in
>>>  dwarf2out.c.  */
>>>   streamer_write_chain (ob, BLOCK_VARS (expr), ref_

Re: [PATCH] Prevent cselib substitution of FP, SP, SFP

2012-09-12 Thread Carrot Wei
Hi Jakub

The same problem also affects gcc4.6,
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54398. Could this be
ported to 4.6 branch?

thanks
Carrot

On Mon, Feb 13, 2012 at 11:54 AM, Jakub Jelinek  wrote:
>
> On Wed, Jan 04, 2012 at 05:21:38PM +, Marcus Shawcroft wrote:
> > Alias analysis by DSE based on CSELIB expansion assumes that
> > references to the stack frame from different base registers (ie FP, SP)
> > never alias.
> >
> > The comment block in cselib explains that cselib does not allow
> > substitution of FP, SP or SFP specifically in order not to break DSE.
>
> Looks reasonable, appart from coding style (no spaces around -> and
> no {} around return p->loc;), I just wonder if having a separate
> loop in expand_loc just for this isn't too expensive.  On sane targets
> IMHO hard frame pointer in the prologue should be initialized from sp, not
> the other way around, thus hard frame pointer based VALUEs should have
> hard frame pointer earlier in the locs list (when there is
> hfp = sp (+ optionally some const)
> insn, we first cselib_lookup_from_insn the rhs and add to locs
> of the new VALUE (plus (VALUE of sp) (const_int)), then process the
> lhs and add it to locs, moving the plus to locs->next).
> So I think the following patch could be enough (bootstrapped/regtested
> on x86_64-linux and i686-linux).
> There is AVR though, which has really weirdo prologue - PR50063,
> but I think it should just use UNSPEC for that or something similar,
> setting sp from hfp seems unnecessary and especially for values with long
> locs chains could make cselib more expensive.
>
> Richard, what do you think about this?
>
> 2012-02-13  Jakub Jelinek  
>
> * cselib.c (expand_loc): Return sp, fp, hfp or cfa base reg right
> away if seen.
>
> --- gcc/cselib.c.jj 2012-02-13 11:07:15.0 +0100
> +++ gcc/cselib.c2012-02-13 18:15:17.531776145 +0100
> @@ -1372,8 +1372,18 @@ expand_loc (struct elt_loc_list *p, stru
>unsigned int regno = UINT_MAX;
>struct elt_loc_list *p_in = p;
>
> -  for (; p; p = p -> next)
> +  for (; p; p = p->next)
>  {
> +  /* Return these right away to avoid returning stack pointer based
> +expressions for frame pointer and vice versa, which is something
> +that would confuse DSE.  See the comment in
> cselib_expand_value_rtx_1
> +for more details.  */
> +  if (REG_P (p->loc)
> + && (REGNO (p->loc) == STACK_POINTER_REGNUM
> + || REGNO (p->loc) == FRAME_POINTER_REGNUM
> + || REGNO (p->loc) == HARD_FRAME_POINTER_REGNUM
> + || REGNO (p->loc) == cfa_base_preserved_regno))
> +   return p->loc;
>/* Avoid infinite recursion trying to expand a reg into a
>  the same reg.  */
>if ((REG_P (p->loc))
>
>
> Jakub


Re: Backtrace library [1/3]

2012-09-12 Thread Joseph S. Myers
On Wed, 12 Sep 2012, Ian Lance Taylor wrote:

> On Wed, Sep 12, 2012 at 9:23 AM, Joseph S. Myers
>  wrote:
> > On Tue, 11 Sep 2012, Ian Lance Taylor wrote:
> >
> >> The configury is fairly standard.  Note that libbacktrace is built as
> >> both a host library (to link into the compilers) and as a target library
> >> (to link into libgo and possibly other libraries).
> >
> > Under what circumstances will the library be built for the target - only
> > if a relevant language such as Go is being built, or unconditionally?
> 
> My intent is to only build it when something needs it, e.g., libgo.  I
> don't know if I've expressed that intent correctly.

I think that if a library is listed in target_libs in config-lang.in for 
at least one language, and if all languages with it so listed are 
disabled, then that library will be disabled - but if no languages list it 
in config-lang.in, it will be enabled by default.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] Fix PR gcov-profile/54487 (profiledbootstrap intermittent failures) (issue6496113)

2012-09-12 Thread Teresa Johnson
On Wed, Sep 12, 2012 at 1:54 PM, Jakub Jelinek  wrote:
> On Wed, Sep 12, 2012 at 01:45:12PM -0700, Teresa Johnson wrote:
>> This fixes PR gcov-profile/54487 where the gcda files were not locked
>> by the profile-use read, enabling writes by other instrumented compiles
>> to change the profile in the middle of the profile use read. The GCOV_LOCKED
>> macro was not set because it was guarded by HOST_HAS_F_SETLKW, which was
>> never set. The fix is to add a compile test in the configure to set it.
>>
>> Tested with bootstrap and profiledbootstrap on x86_64-unknown-linux-gnu.
>> Ok for trunk?
>>
>> Thanks,
>> Teresa
>>
>> 2012-09-12  Teresa Johnson  
>>
>
> Please include
> PR gcov-profile/54487
> here in the ChangeLog entry.
>
>>   * configure.ac(HOST_HAS_F_SETLKW): Set based on compile
>
> Space before (.
>
>>   test using F_SETLKW with fcntl.
>>   * configure, config.in: Regenerate.
>>
>> --- configure.ac  (revision 191225)
>> +++ configure.ac  (working copy)
>> @@ -1159,6 +1159,16 @@ if test $gcc_cv_type_clock_t = yes; then
>>[Define if  defines clock_t.])
>>  fi
>>
>> +# Check if F_SETLKW is supported by fcntl.
>> +AC_CACHE_CHECK(for F_SETLKW, ac_cv_f_setlkw, [
>> +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
>> +#include "fcntl.h"
>
> Please use
> #include 
> instead.
>
>> +]], [[struct flock fl; fl.l_whence = 0; fl.l_start = 0; fl.l_len = 0; 
>> fl.l_pid = 0; return fcntl (1, F_SETLKW, 
>> &fl);]])],[ac_cv_f_setlkw=yes],[ac_cv_f_setlkw=no])])
>
> And split this overlong line, there is no reason why you can't use a newline
> e.g. after every ; in the test proglet.
>
>> +if test $ac_cv_f_setlkw = yes; then
>> +  AC_DEFINE(HOST_HAS_F_SETLKW, 1,
>> +  [Define if F_SETLKW supported by fcntl.])
>> +fi
>> +
>>  # Restore CFLAGS, CXXFLAGS from before the gcc_AC_NEED_DECLARATIONS tests.
>>  CFLAGS="$saved_CFLAGS"
>>  CXXFLAGS="$saved_CXXFLAGS"
>
> Ok for trunk with those changes, IMHO it would be worthwhile to put this
> into 4.7 too, I've seen several unexplained profiledbootstrap errors on
> that branch in the past already when using make -jN with high N.

Ok, thanks. I will fix the issues you pointed out above and retest
before committing. I'll prepare a backport patch for 4.7 as well.

Teresa

>
> Jakub



-- 
Teresa Johnson | Software Engineer | tejohn...@google.com | 408-460-2413


Re: shrink-wrapping duplicates BBs across partitions.

2012-09-12 Thread Andrew Pinski
On Wed, Sep 12, 2012 at 4:47 AM, Christian Bruel  wrote:
> The problem stems from tree-ssa-tail-merge that breaks bb->count, The
> CFG looks like
>
>  2
>/  \
>   /6
>  5 (0) |
>  | 3 <-
>  |/   \   |
>  |   7 (1)  8 -
>  | /
>   4 (1)
>
> (in parenthesis the bb->count from gcov)
>
>  2
>/  \
>   /6
>  /  |
>  |  3 <--
>  | / |  |
>  5 (0)   8 --
>  |
>  |
>   4 (1)
>
> so 5 and 4 are now in different partitions, producing an assertion
> because there is no EDGE_CROSSING between them.
>
> I can see 3 solutions to this
>
> 1) merge the BB counts in tree-ssa-tail-merge.c, so 5 is in the same
> partition that 4

This looks correct as we already do that for the frequency.

>
> 2) don't tail-merge blocks that belong to different partitions.

I don't think you detect this on the tree level as the partitioning
has not happened yet.

Thanks,
Andrew Pinski

>
> 3) add a EDGE_CROSSING flag on the edge between 4 and 5.
>
> 1) fixes the problem, so 5 and 4 are now in the same partition. The fix
> is quite trivial, as with attached.
>
> the other solution 2) is more conservative, and also fixes the problem.
>
> I don't think 3) is necessary.
>
> more ideas ?
>
> thanks,
>
> Christian
>
>
> On 09/11/2012 06:21 PM, Jakub Jelinek wrote:
>> On Tue, Sep 11, 2012 at 05:40:30PM +0200, Steven Bosscher wrote:
>>> On Tue, Sep 11, 2012 at 5:31 PM, Christian Bruel  
>>> wrote:
 Actually, the edge is fairly simple. I have

 BB5 (BB_COLD_PARTITION) -> BB10 (BB_HOT_PARTITION) -> EXIT

 and BB10 has no other incoming edges. and we are duplicating it.
>>>
>>> That is wrong, should never happen. Is there a test case to play with?
>>> It'd be good to have a PR for this.
>>
>> Isn't that the standard case when !HAVE_return ?  Then you can have only a
>> single return through epilogue, and when the epilogue is in the hot
>> partition, even if cold code is returning, it needs to jump to the epilogue.
>>
>>   Jakub
>>


Loop stride inline hint

2012-09-12 Thread Jan Hubicka
Hi,
this patch makes inliner to realize that it is good idea to inline when loop
stride becomes constant. This is mostly to help fortran testcases where
it is important to inline to get array descriptors.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

* ipa-inline-analysis.c (dump_inline_hints): Dump loop stride.
(set_hint_predicate): New function.
(reset_inline_summary): Reset loop stride.
(remap_predicate_after_duplication): New function.
(remap_hint_predicate_after_duplication): New function.
(inline_node_duplication_hook): Update.
(dump_inline_summary): Dump stride summaries.
(estimate_function_body_sizes): Compute strides.
(remap_hint_predicate): New function.
(inline_merge_summary): Use it.
(inline_read_section): Read stride.
(inline_write_summary): Write stride.
* ipa-inline.c (want_inline_small_function_p): Handle strides.
(edge_badness): Likewise.
* ipa-inline.h (inline_hints_vals): Add stride hint.
(inline_summary): Update stride.

* gcc.dg/ipa/inlinehint-2.c: New testcase.
Index: ipa-inline.c
===
*** ipa-inline.c(revision 191228)
--- ipa-inline.c(working copy)
*** want_inline_small_function_p (struct cgr
*** 481,487 
else if (DECL_DECLARED_INLINE_P (callee->symbol.decl)
   && growth >= MAX_INLINE_INSNS_SINGLE
   && !(hints & (INLINE_HINT_indirect_call
!| INLINE_HINT_loop_iterations)))
{
e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
  want_inline = false;
--- 481,488 
else if (DECL_DECLARED_INLINE_P (callee->symbol.decl)
   && growth >= MAX_INLINE_INSNS_SINGLE
   && !(hints & (INLINE_HINT_indirect_call
!| INLINE_HINT_loop_iterations
!| INLINE_HINT_loop_stride)))
{
e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
  want_inline = false;
*** want_inline_small_function_p (struct cgr
*** 533,539 
 inlining given function is very profitable.  */
else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
   && growth >= ((hints & (INLINE_HINT_indirect_call
!  | INLINE_HINT_loop_iterations))
 ? MAX (MAX_INLINE_INSNS_AUTO,
MAX_INLINE_INSNS_SINGLE)
 : MAX_INLINE_INSNS_AUTO))
--- 534,541 
 inlining given function is very profitable.  */
else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
   && growth >= ((hints & (INLINE_HINT_indirect_call
!  | INLINE_HINT_loop_iterations
!  | INLINE_HINT_loop_stride))
 ? MAX (MAX_INLINE_INSNS_AUTO,
MAX_INLINE_INSNS_SINGLE)
 : MAX_INLINE_INSNS_AUTO))
*** edge_badness (struct cgraph_edge *edge, 
*** 866,872 
fprintf (dump_file, "Badness overflow\n");
}
if (hints & (INLINE_HINT_indirect_call
!  | INLINE_HINT_loop_iterations))
badness /= 8;
if (dump)
{
--- 868,875 
fprintf (dump_file, "Badness overflow\n");
}
if (hints & (INLINE_HINT_indirect_call
!  | INLINE_HINT_loop_iterations
!  | INLINE_HINT_loop_stride))
badness /= 8;
if (dump)
{
Index: ipa-inline.h
===
*** ipa-inline.h(revision 191228)
--- ipa-inline.h(working copy)
*** typedef struct GTY(()) condition
*** 46,52 
 They are represtented as bitmap of the following values.  */
  enum inline_hints_vals {
INLINE_HINT_indirect_call = 1,
!   INLINE_HINT_loop_iterations = 2
  };
  typedef int inline_hints;
  
--- 46,53 
 They are represtented as bitmap of the following values.  */
  enum inline_hints_vals {
INLINE_HINT_indirect_call = 1,
!   INLINE_HINT_loop_iterations = 2,
!   INLINE_HINT_loop_stride = 4
  };
  typedef int inline_hints;
  
*** struct GTY(()) inline_summary
*** 120,128 
conditions conds;
VEC(size_time_entry,gc) *entry;
  
!   /* Predicate on when some loop in the function sbecomes to have known
   bounds.   */
struct predicate * GTY((skip)) loop_iterations;
  };
  
  
--- 121,132 
conditions conds;
VEC(size_time_entry,gc) *entry;
  
!   /* Predicate on when some loop in the function becomes to have known
   bounds.   */
struct predicate * GTY((skip)) loop_iterations;
+   /* Predicate on when some loop in the function becomes to have known
+  stride.   */
+   st

Re: Loop stride optimization hint

2012-09-12 Thread Martin Jambor
Hi,

On Wed, Sep 12, 2012 at 07:57:16PM +0200, Jan Hubicka wrote:
> Hi,
> for Fortran one of common reason to inline is because array descriptor is 
> known and defines
> loop stride.  This patch makes ipa-inline-analysis to notice these cases.
> 
> Bootstrapped/regtested x86_64-linux, will commit it tomorrow if there are no 
> complains.
> 
> Honza
> 
>   * ipa-inline-analysis.c (dump_inline_hints): Dump loop stride.
>   (set_hint_predicate): New function.
>   (reset_inline_summary): Reset loop stride.
>   (remap_predicate_after_duplication): New function.
>   (remap_hint_predicate_after_duplication): New function.
>   (inline_node_duplication_hook): Update.
>   (dump_inline_summary): Dump stride summaries.
>   (estimate_function_body_sizes): Compute strides.
>   (remap_hint_predicate): New function.
>   (inline_merge_summary): Use it.
>   (inline_read_section): Read stride.
>   (inline_write_summary): Write stride.
>   * ipa-inline.c (want_inline_small_function_p): Handle strides.
>   (edge_badness): Likewise.
>   * ipa-inline.h (inline_hints_vals): Add stride hint.
>   (inline_summary): Update stride.
> 
>   * gcc.dg/ipa/inlinehint-2.c: New testcase.

...

> *** estimate_function_body_sizes (struct cgr
> *** 2390,2395 
> --- 2442,2448 
> struct loop *loop;
> loop_iterator li;
> predicate loop_iterations = true_predicate ();
> +   predicate loop_stride = true_predicate ();
>   
> if (dump_file && (dump_flags & TDF_DETAILS))
>   flow_loops_dump (dump_file, NULL, 0);
> *** estimate_function_body_sizes (struct cgr
> *** 2398,2405 
>   {
> VEC (edge, heap) *exits;
> edge ex;
> !   unsigned int j;
> struct tree_niter_desc niter_desc;
>   
> exits = get_loop_exit_edges (loop);
> FOR_EACH_VEC_ELT (edge, exits, j, ex)
> --- 2451,2459 
>   {
> VEC (edge, heap) *exits;
> edge ex;
> !   unsigned int j, i;
> struct tree_niter_desc niter_desc;
> +   basic_block *body = get_loop_body (loop);
>   
> exits = get_loop_exit_edges (loop);
> FOR_EACH_VEC_ELT (edge, exits, j, ex)
> *** estimate_function_body_sizes (struct cgr
> *** 2416,2427 
> loop_iterations = and_predicates (info->conds, 
> &loop_iterations, &will_be_nonconstant);
> }
> VEC_free (edge, heap, exits);
>   }
> !   if (!true_predicate_p (&loop_iterations))
> ! {
> !   inline_summary (node)->loop_iterations = (struct predicate 
> *)pool_alloc (edge_predicate_pool);
> !   *inline_summary (node)->loop_iterations = loop_iterations;
> ! }
> scev_finalize ();
>   }
> inline_summary (node)->self_time = time;
> --- 2470,2508 
> loop_iterations = and_predicates (info->conds, 
> &loop_iterations, &will_be_nonconstant);
> }
> VEC_free (edge, heap, exits);
> + 
> +   for (i = 0; i < loop->num_nodes; i++)
> + {
> +   gimple_stmt_iterator gsi;
> +   for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next 
> (&gsi))
> + {
> +   gimple stmt = gsi_stmt (gsi);
> +   affine_iv iv;
> +   ssa_op_iter iter;
> +   tree use;
> + 
> +   FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
> + {
> +   predicate will_be_nonconstant;
> + 
> +   if (!simple_iv (loop, loop_containing_stmt (stmt), use, 
> &iv, true)
> +   || is_gimple_min_invariant (iv.step))
> + continue;
> +   will_be_nonconstant
> += will_be_nonconstant_expr_predicate (parms_info, info,
> +  iv.step, 
> nonconstant_names);
> +   if (!true_predicate_p (&will_be_nonconstant)
> +   && !false_predicate_p (&will_be_nonconstant))
> + /* This is slightly inprecise.  We may want to 
> represent each loop with
> +independent predicate.  */
> + loop_stride = and_predicates (info->conds, 
> &loop_stride, &will_be_nonconstant);
> + }
> + }
> + }
> +   free (body);
>   }
> !   set_hint_predicate (&inline_summary (node)->loop_iterations, 
> loop_iterations);
> !   set_hint_predicate (&inline_summary (node)->loop_stride, loop_stride);
> scev_finalize ();
>   }
> inline_summary (node)->self_time = time;

Well, I know i's not that important but some lines are clearly wider
than 80 characters and makes it more difficult to read for people like
me.  In fact, the already committed loop_iterations computation also
often exceeds the limit.

I have also been wondering whether we

Re: [PATCH] limited C++ parsing support for gengtype

2012-09-12 Thread Gabriel Dos Reis
On Wed, Sep 12, 2012 at 4:54 PM, Aaron Gray  wrote:
> On 11 September 2012 23:45, Gabriel Dos Reis
>  wrote:
>> On Tue, Sep 11, 2012 at 3:41 PM, Diego Novillo  wrote:
>>
 @@ -778,6 +791,7 @@ type (options_p *optsp, bool nested)
 return resolve_typedef (s, &lexer_line);

   case STRUCT:
 +case CLASS:
>>>
>>>
>>> I think that as far as gengtype is concerned, 'struct' and 'class' should be
>>> exactly the same thing.  So, all the handling for 'CLASS' you added should
>>> not be needed.
>>
>>
>> 100% agreed.
>
> The reason I included a CLASS type distinct from STRUCT was for
> reporting debugging information when I get to that stage.

In general, patches are easier to assess when they are self-contained
(e.g. each change is justified by the purpose of the patch.)   My recommendation
would be for your patches to focus on one topic at a time.

-- Gaby


Re: Loop stride optimization hint

2012-09-12 Thread Jan Hubicka
> > !   set_hint_predicate (&inline_summary (node)->loop_iterations, 
> > loop_iterations);
> > !   set_hint_predicate (&inline_summary (node)->loop_stride, 
> > loop_stride);
> > scev_finalize ();
> >   }
> > inline_summary (node)->self_time = time;
> 
> Well, I know i's not that important but some lines are clearly wider
> than 80 characters and makes it more difficult to read for people like
> me.  In fact, the already committed loop_iterations computation also
> often exceeds the limit.

Yeah, I will reindent that.
> 
> I have also been wondering whether we really want to and all the
> different will_be_nonconstant predicates to produce the final hint
> predicates.  IIUC we won't get the hint unless all loop
> iterations/strides are known.  The loop iterations predicate in the
> fairly simple pr48636.f90 is:
> 
>   loop iterations:(op0[ref offset: 192] changed || op0[ref offset: 256] 
> changed || op0[ref offset: 224] changed) && (op0[ref offset: 96] changed || 
> op0[ref offset: 160] changed || op0[ref offset: 128] changed)
> 
> With predicates like these, it's going to be very difficult for IPA-CP
> to put together a combination of known values and aggregate contents
> for just some contexts so that we hit the sweet spot.  The current
> approach of trying one known context-specific value at a time
> certainly will not work in real cases, at the same time, it is really
> built around independent assessments of the values...

Well, I intentionally made the code to make it easy to drop multiple
predicates, i.e. simply replace current predicate pointer by vector of
independent predicates, so each loop can be handled independently.

In the case of pr48636.f90 I would however expect
that we should be able to figure out tha tthe whole loop descriptor
is constant and match the above as true?

Honza


[PING^2] C++ conversion - pull in cstdlib

2012-09-12 Thread Oleg Endo
Hello,

On Sun, 2012-09-02 at 01:19 +0200, Oleg Endo wrote:
> On Sat, 2012-09-01 at 18:25 +0200, Oleg Endo wrote:
> > On Sat, 2012-09-01 at 16:17 +, Joseph S. Myers wrote:
> > > On Sat, 1 Sep 2012, Oleg Endo wrote:
> > > 
> > > > Ping!
> > > > 
> > > > This allows one to include e.g.  in GCC source files.
> > > > Since the switch to C++ has been made, this should be OK to do now, I
> > > > guess.
> > > 
> > > This is not a review, but have you tested building the Ada front end with 
> > > this patch applied?  Given recent issues relating to how Ada uses 
> > > system.h, I think any such changes need testing for Ada.
> > > 
> > 
> > No I haven't. C and C++ only. Good to know, thanks.  Will try.
> > 
> 
> OK, now I have. ada, c, c++, fortran, go, java, objc, obj-c++ do build
> here.
> 

Would it be OK to install the patch originally posted here:
http://gcc.gnu.org/ml/gcc-patches/2012-08/msg01761.html ?

If not OK, it's also fine.  Just let me know.  It seems the issue can be
worked around in individual source files by including  before
"system.h" (as it is already done in config/sh/sh.c).

Cheers,
Oleg





Re: [patch] Finish double_int conversion.

2012-09-12 Thread Eric Botcazou
> Index: gcc/config/sparc/sparc.c
> ===
> --- gcc/config/sparc/sparc.c  (revision 191083)
> +++ gcc/config/sparc/sparc.c  (working copy)
> @@ -10113,33 +10113,27 @@ sparc_fold_builtin (tree fndecl, int n_a
> && TREE_CODE (arg1) == VECTOR_CST
> && TREE_CODE (arg2) == INTEGER_CST)
>   {
> -   int overflow = 0;
> -   unsigned HOST_WIDE_INT low = TREE_INT_CST_LOW (arg2);
> -   HOST_WIDE_INT high = TREE_INT_CST_HIGH (arg2);
> +   bool overflow = false;
> +   double_int di_arg2 = TREE_INT_CST (arg2);
> unsigned i;
> 
> for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
>   {
> -   unsigned HOST_WIDE_INT
> - low0 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i)),
> - low1 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg1, i));
> -   HOST_WIDE_INT
> - high0 = TREE_INT_CST_HIGH (VECTOR_CST_ELT (arg0, i));
> -   HOST_WIDE_INT
> - high1 = TREE_INT_CST_HIGH (VECTOR_CST_ELT (arg1, i));
> +   double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i));
> +   double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i));
> 
> -   unsigned HOST_WIDE_INT l;
> -   HOST_WIDE_INT h;
> +   bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
> 
> -   overflow |= neg_double (low1, high1, &l, &h);
> -   overflow |= add_double (low0, high0, l, h, &l, &h);
> -   if (h < 0)
> - overflow |= neg_double (l, h, &l, &h);
> +   double_int tmp = e1.neg_with_overflow (&neg1_ovf);
> +   tmp = e0.add_with_sign (tmp, false, &add1_ovf);
> +   if (tmp.is_negative ())
> + tmp = tmp.neg_with_overflow (&neg2_ovf);
> 
> -   overflow |= add_double (low, high, l, h, &low, &high);
> +   tmp = di_arg2.add_with_sign (tmp, false, &add2_ovf);
> +   overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
>   }
> 
> -   gcc_assert (overflow == 0);
> +   gcc_assert (!overflow);
> 
> return build_int_cst_wide (rtype, low, high);
>   }

This cannot build because of the references to low and high in the last line.

As Richard said, building a cross cc1 is very easy.

-- 
Eric Botcazou


Re: Loop stride optimization hint

2012-09-12 Thread Jan Hubicka
> > 
> >   loop iterations:(op0[ref offset: 192] changed || op0[ref offset: 256] 
> > changed || op0[ref offset: 224] changed) && (op0[ref offset: 96] changed || 
> > op0[ref offset: 160] changed || op0[ref offset: 128] changed)

Looking at the testcase, loop stride is (op0[ref offset: 384] changed) && 
(op0[ref offset: 192] changed)

that is more trackable ;)  The problem with iterations is simply that they
verify that ubound,hbound and stride is known.

Also the individual loops are combined by and_predicates.  We hint when one of
loops become to have known stride or iterations. We lose track then and do not
hint for further loops on subsequent inlining.  I am not sure how important it
is - for fortran I guess the first inlining matter the most.

Honza


Re: [SH] Correct address cost estimations

2012-09-12 Thread Kaz Kojima
Oleg Endo  wrote:
> This corrects the address cost estimations for SH.
> Tested on rev 191161 with 
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> 
> and no new failures.
> With this applied CSiBE shows a total size decrease of 4668 bytes for
> '-O2 -m4-single -ml -mpretend-cmove'.
> 
> OK?

OK.

Regards,
kaz


Re: [patch] Fix PR rtl-optimization/54290

2012-09-12 Thread Eric Botcazou
> Bootstrapped/regtested on x86_64-suse-linux.  Does that look plausible?  Do
> we want to fix this on release branches as well?
> 
> 
> 2012-09-02  Eric Botcazou  
> 
>   PR rtl-optimization/54290
>   * reload1.c (choose_reload_regs): Also take into account secondary MEMs
>   to remove address replacements for inherited reloads.

Ulrich, would you mind taking a look when you have some time?  TIA.
  http://gcc.gnu.org/ml/gcc-patches/2012-09/msg00037.html

-- 
Eric Botcazou


Re: Scheduler: Allow breaking dependencies by modifying patterns

2012-09-12 Thread Maxim Kuvyrkov
On 12/09/2012, at 4:34 AM, Vladimir Makarov wrote:

> On 08/03/2012 08:05 AM, Bernd Schmidt wrote:
>> 
...
> Ok, thanks.  The changes are pretty straightforward.  Only just a few 
> comments.
> 
...
> 
> Thanks for the patch, Bernd.  Sorry for the delay with the review.  I thought 
> that Maxim writes his comments first.

I reviewed the patch in http://gcc.gnu.org/ml/gcc-patches/2012-08/msg01028.html 
(comments were similar to yours), but didn't CC you.  Anyway, thanks for the 
review!

--
Maxim Kuvyrkov
CodeSourcery / Mentor Graphics



[google/gcc-4_7] Fix problem with asm spec for -gsplit-dwarf

2012-09-12 Thread Cary Coutant
2012-09-12   Cary Coutant  

gcc/
* gcc.c (replace_extension_spec_func): Restrict search for
extension to last component of path.

Index: gcc/gcc.c
===
--- gcc/gcc.c   (revision 191233)
+++ gcc/gcc.c   (working copy)
@@ -8413,12 +8413,18 @@ replace_extension_spec_func (int argc, c
   char *name;
   char *p;
   char *result;
+  int i;
 
   if (argc != 2)
 fatal_error ("too few arguments to %%:replace-extension");
 
   name = xstrdup (argv[0]);
-  p = strrchr (name, '.');
+
+  for (i = strlen(name) - 1; i >= 0; i--)
+if (IS_DIR_SEPARATOR (name[i]))
+  break;
+
+  p = strrchr (name + i + 1, '.');
   if (p != NULL)
   *p = '\0';
 


Re: [google/gcc-4_7] Fix problem with asm spec for -gsplit-dwarf

2012-09-12 Thread Sterling Augustine
On Wed, Sep 12, 2012 at 4:18 PM, Cary Coutant  wrote:
> 2012-09-12   Cary Coutant  
>
> gcc/
> * gcc.c (replace_extension_spec_func): Restrict search for
> extension to last component of path.
>
> Index: gcc/gcc.c
> ===
> --- gcc/gcc.c   (revision 191233)
> +++ gcc/gcc.c   (working copy)
> @@ -8413,12 +8413,18 @@ replace_extension_spec_func (int argc, c
>char *name;
>char *p;
>char *result;
> +  int i;
>
>if (argc != 2)
>  fatal_error ("too few arguments to %%:replace-extension");
>
>name = xstrdup (argv[0]);
> -  p = strrchr (name, '.');
> +
> +  for (i = strlen(name) - 1; i >= 0; i--)
> +if (IS_DIR_SEPARATOR (name[i]))
> +  break;
> +
> +  p = strrchr (name + i + 1, '.');
>if (p != NULL)
>*p = '\0';
>

This is OK for Google 4.7.

Sterling


Re: [google/gcc-4_7] Fix problem with asm spec for -gsplit-dwarf

2012-09-12 Thread Cary Coutant
>> 2012-09-12   Cary Coutant  
>>
>> gcc/
>> * gcc.c (replace_extension_spec_func): Restrict search for
>> extension to last component of path.
>
> This is OK for Google 4.7.

Thanks, committed as r191234.

-cary


Re: [C PATCH] Avoid ICEs due to save_expr instead of c_save_expr (PR c/54428)

2012-09-12 Thread H.J. Lu
On Fri, Aug 31, 2012 at 5:36 AM, Jakub Jelinek  wrote:
> Hi!
>
> This is another case of the issue that save_expr shouldn't be called
> when parsing C (except for in_late_binary_op), but c_save_expr must be
> called instead.
>
> I wonder if we shouldn't add a langhook, which would do
> if (in_late_binary_op) save_expr_1 else c_save_expr
> and ensure that when not parsing in_late_binary_op is set or something
> similar, and if non-NULL, call the langhook from save_expr.
>
> Anyway, this patch fixes this issue even without such changes,
> bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/4.7?
>
> 2012-08-31  Jakub Jelinek  
>
> PR c/54428
> * c-convert.c (convert): Don't call fold_convert_loc if
> TYPE_MAIN_VARIANT of a COMPLEX_TYPE is the same, unless e
> is a COMPLEX_EXPR.  Remove TYPE_MAIN_VARIANT check from
> COMPLEX_TYPE -> COMPLEX_TYPE conversion.
>
> * gcc.c-torture/compile/pr54428.c: New test.
>

This caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54559

-- 
H.J.


Re: Backtrace library [1/3]

2012-09-12 Thread Ian Lance Taylor
On Wed, Sep 12, 2012 at 2:04 PM, Joseph S. Myers
 wrote:
> On Wed, 12 Sep 2012, Ian Lance Taylor wrote:
>
>> On Wed, Sep 12, 2012 at 9:23 AM, Joseph S. Myers
>>  wrote:
>> > On Tue, 11 Sep 2012, Ian Lance Taylor wrote:
>> >
>> >> The configury is fairly standard.  Note that libbacktrace is built as
>> >> both a host library (to link into the compilers) and as a target library
>> >> (to link into libgo and possibly other libraries).
>> >
>> > Under what circumstances will the library be built for the target - only
>> > if a relevant language such as Go is being built, or unconditionally?
>>
>> My intent is to only build it when something needs it, e.g., libgo.  I
>> don't know if I've expressed that intent correctly.
>
> I think that if a library is listed in target_libs in config-lang.in for
> at least one language, and if all languages with it so listed are
> disabled, then that library will be disabled - but if no languages list it
> in config-lang.in, it will be enabled by default.

Looks right.  If libbacktrace is approved and committed, I will
included this patch to gcc/go/config-lang.in.  I tested that it indeed
causes libbacktrace to be built only for the host, not for the target,
when not building Go.

Ian

Index: gcc/go/config-lang.in
===
--- gcc/go/config-lang.in   (revision 191171)
+++ gcc/go/config-lang.in   (working copy)
@@ -28,7 +28,7 @@ language="go"

 compilers="go1\$(exeext)"

-target_libs="target-libgo target-libffi"
+target_libs="target-libgo target-libffi target-libbacktrace"

 # The Go frontend is written in C++, so we need to build the C++
 # compiler during stage 1.


Re: [PATCH] Fix PR gcov-profile/54487 (profiledbootstrap intermittent failures) (issue6496113)

2012-09-12 Thread Teresa Johnson
On Wed, Sep 12, 2012 at 2:12 PM, Teresa Johnson  wrote:
> On Wed, Sep 12, 2012 at 1:54 PM, Jakub Jelinek  wrote:
>> On Wed, Sep 12, 2012 at 01:45:12PM -0700, Teresa Johnson wrote:
>>> This fixes PR gcov-profile/54487 where the gcda files were not locked
>>> by the profile-use read, enabling writes by other instrumented compiles
>>> to change the profile in the middle of the profile use read. The GCOV_LOCKED
>>> macro was not set because it was guarded by HOST_HAS_F_SETLKW, which was
>>> never set. The fix is to add a compile test in the configure to set it.
>>>
>>> Tested with bootstrap and profiledbootstrap on x86_64-unknown-linux-gnu.
>>> Ok for trunk?
>>>
>>> Thanks,
>>> Teresa
>>>
>>> 2012-09-12  Teresa Johnson  
>>>
>>
>> Please include
>> PR gcov-profile/54487
>> here in the ChangeLog entry.
>>
>>>   * configure.ac(HOST_HAS_F_SETLKW): Set based on compile
>>
>> Space before (.
>>
>>>   test using F_SETLKW with fcntl.
>>>   * configure, config.in: Regenerate.
>>>
>>> --- configure.ac  (revision 191225)
>>> +++ configure.ac  (working copy)
>>> @@ -1159,6 +1159,16 @@ if test $gcc_cv_type_clock_t = yes; then
>>>[Define if  defines clock_t.])
>>>  fi
>>>
>>> +# Check if F_SETLKW is supported by fcntl.
>>> +AC_CACHE_CHECK(for F_SETLKW, ac_cv_f_setlkw, [
>>> +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
>>> +#include "fcntl.h"
>>
>> Please use
>> #include 
>> instead.
>>
>>> +]], [[struct flock fl; fl.l_whence = 0; fl.l_start = 0; fl.l_len = 0; 
>>> fl.l_pid = 0; return fcntl (1, F_SETLKW, 
>>> &fl);]])],[ac_cv_f_setlkw=yes],[ac_cv_f_setlkw=no])])
>>
>> And split this overlong line, there is no reason why you can't use a newline
>> e.g. after every ; in the test proglet.
>>
>>> +if test $ac_cv_f_setlkw = yes; then
>>> +  AC_DEFINE(HOST_HAS_F_SETLKW, 1,
>>> +  [Define if F_SETLKW supported by fcntl.])
>>> +fi
>>> +
>>>  # Restore CFLAGS, CXXFLAGS from before the gcc_AC_NEED_DECLARATIONS tests.
>>>  CFLAGS="$saved_CFLAGS"
>>>  CXXFLAGS="$saved_CXXFLAGS"
>>
>> Ok for trunk with those changes, IMHO it would be worthwhile to put this
>> into 4.7 too, I've seen several unexplained profiledbootstrap errors on
>> that branch in the past already when using make -jN with high N.
>
> Ok, thanks. I will fix the issues you pointed out above and retest
> before committing. I'll prepare a backport patch for 4.7 as well.

Patch committed. I have the 4_7 backport ready. It is basically the
same patch. Also tested with bootstap and profiledbootstrap. Ok for
gcc/4_7?

Thanks,
Teresa

2012-09-12  Teresa Johnson  

Backport from mainline.
2012-09-12  Teresa Johnson  

PR gcov-profile/54487
* configure.ac (HOST_HAS_F_SETLKW): Set based on compile
test using F_SETLKW with fcntl.
* configure, config.in: Regenerate.

Index: configure
===
--- configure   (revision 191237)
+++ configure   (working copy)
@@ -10968,6 +10968,46 @@ $as_echo "#define HAVE_CLOCK_T 1" >>confdefs.h

 fi

+# Check if F_SETLKW is supported by fcntl.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for F_SETLKW" >&5
+$as_echo_n "checking for F_SETLKW... " >&6; }
+if test "${ac_cv_f_setlkw+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include 
+int
+main ()
+{
+
+struct flock fl;
+fl.l_whence = 0;
+fl.l_start = 0;
+fl.l_len = 0;
+fl.l_pid = 0;
+return fcntl (1, F_SETLKW, &fl);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_cv_f_setlkw=yes
+else
+  ac_cv_f_setlkw=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_f_setlkw" >&5
+$as_echo "$ac_cv_f_setlkw" >&6; }
+if test $ac_cv_f_setlkw = yes; then
+
+$as_echo "#define HOST_HAS_F_SETLKW 1" >>confdefs.h
+
+fi
+
 # Restore CFLAGS, CXXFLAGS from before the gcc_AC_NEED_DECLARATIONS tests.
 CFLAGS="$saved_CFLAGS"
 CXXFLAGS="$saved_CXXFLAGS"
@@ -17970,7 +18010,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 17973 "configure"
+#line 18013 "configure"
 #include "confdefs.h"

 #if HAVE_DLFCN_H
@@ -18076,7 +18116,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 18079 "configure"
+#line 18119 "configure"
 #include "confdefs.h"

 #if HAVE_DLFCN_H
Index: config.in
===
--- config.in   (revision 191237)
+++ config.in   (working copy)
@@ -1588,6 +1588,12 @@
 #endif


+/* Define if F_SETLKW supported by fcntl. */
+#ifndef USED_FOR_TARGET
+#undef HOST_HAS_F_SETLKW
+#endif
+
+
 /* Define as const if the declaration of iconv() needs const. */
 #ifndef USED_FOR_TARGET
 #undef ICONV_CONST
Index: configure.ac
===

Re: [PATCH] Fix PR gcov-profile/54487 (profiledbootstrap intermittent failures) (issue6496113)

2012-09-12 Thread Jakub Jelinek
On Wed, Sep 12, 2012 at 10:06:02PM -0700, Teresa Johnson wrote:
> Patch committed. I have the 4_7 backport ready. It is basically the
> same patch. Also tested with bootstap and profiledbootstrap. Ok for
> gcc/4_7?

Yes, thanks.

> 2012-09-12  Teresa Johnson  
> 
> Backport from mainline.
> 2012-09-12  Teresa Johnson  
> 
> PR gcov-profile/54487
> * configure.ac (HOST_HAS_F_SETLKW): Set based on compile
> test using F_SETLKW with fcntl.
> * configure, config.in: Regenerate.

Jakub