[PATCH, SMS] Fix violation of memory dependence

2011-06-13 Thread Revital Eres
Hello,

The attached patch fixes violation of memory dependencies. The
problematic scenario happens when -fmodulo-sched-allow-regmoves flag
is set and certain anti-dep edges are not created.

For example, consider the following three instructions and the edges
between them.  When -fmodulo-sched-allow-regmoves is set the edge (63 -
Anti, 0 -> 64) is not created. (probably due to transitivity)

Insn 63)  r168 = MEM[176]
Out edges: (63 - Anti, 0 -> 64)
In edges: (64 - True, 1 -> 63), (68 - True, 1 -> 63)

insn 64)  176 = 176 + 4
Out edges: (64 - True, 1 -> 63), (64 - True, 0-> 68)
In edges: (63 - Anti, 0 -> 64)

insn 68)  MEM[176 – 4] =  193
Out edges: (68 - True, 1 -> 63)
In edges: (64 - True, 0-> 68)

This anti-dep edge is on the path from one memory instruction to another
--- from 63 to 68; such that removing the edge caused a violation of
the memory dependencies as insn 63 was scheduled after insn 68.

This patch adds intra edges between every two memory instructions in
this case.  It fixes recent bootstrap failure on ARM. (with SMS flags)

The patch was tested as follows:
On ppc64-redhat-linux regtest as well as bootstrap with SMS flags
enabling SMS also on loops with stage count 1.  Regtested on SPU.
On arm-linux-gnueabi bootstrap c language with SMS
flags enabling SMS also on loops with stage count 1
and currently regression testing on c,c++.

OK for mainline once regtest on arm-linux-gnueabi completes?

Thanks,
Revital

Changelog:

gcc/
* ddg.c (add_intra_loop_mem_dep): New function.
(build_intra_loop_deps): Call it.

testsuite/
* gcc.dg/sms-9.c: New file.
Index: ddg.c
===
--- ddg.c   (revision 174906)
+++ ddg.c   (working copy)
@@ -390,6 +390,36 @@ insns_may_alias_p (rtx insn1, rtx insn2)
 &PATTERN (insn2));
 }
 
+/* Given two nodes, analyze their RTL insns and add intra-loop mem deps
+   to ddg G.  */
+static void
+add_intra_loop_mem_dep (ddg_ptr g, ddg_node_ptr from, ddg_node_ptr to)
+{
+
+  if (!insns_may_alias_p (from->insn, to->insn))
+/* Do not create edge if memory references have disjoint alias sets.  */
+return;
+
+  if (mem_write_insn_p (from->insn))
+{
+  if (mem_read_insn_p (to->insn))
+   create_ddg_dep_no_link (g, from, to,
+   DEBUG_INSN_P (to->insn)
+   ? ANTI_DEP : TRUE_DEP, MEM_DEP, 0);
+  else if (from->cuid != to->cuid)
+   create_ddg_dep_no_link (g, from, to,
+   DEBUG_INSN_P (to->insn)
+   ? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0);
+}
+  else
+{
+  if (mem_read_insn_p (to->insn))
+   return;
+  else if (from->cuid != to->cuid)
+   create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0);
+}
+}
+
 /* Given two nodes, analyze their RTL insns and add inter-loop mem deps
to ddg G.  */
 static void
@@ -477,10 +507,22 @@ build_intra_loop_deps (ddg_ptr g)
  if (DEBUG_INSN_P (j_node->insn))
continue;
  if (mem_access_insn_p (j_node->insn))
-   /* Don't bother calculating inter-loop dep if an intra-loop dep
-  already exists.  */
+   {
+ /* Don't bother calculating inter-loop dep if an intra-loop 
dep
+already exists.  */
  if (! TEST_BIT (dest_node->successors, j))
add_inter_loop_mem_dep (g, dest_node, j_node);
+ /* If -fmodulo-sched-allow-regmoves
+is set certain anti-dep edges are not created.
+It might be that these anti-dep edges are on the
+path from one memory instruction to another such that
+removing these edges could cause a violation of the
+memory dependencies.  Thus we add intra edges between
+every two memory instructions in this case.  */
+ if (flag_modulo_sched_allow_regmoves
+ && !TEST_BIT (dest_node->predecessors, j))
+   add_intra_loop_mem_dep (g, j_node, dest_node);
+   }
 }
 }
 }
Index: testsuite/gcc.dg/sms-9.c
===
--- testsuite/gcc.dg/sms-9.c(revision 0)
+++ testsuite/gcc.dg/sms-9.c(revision 0)
@@ -0,0 +1,60 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fmodulo-sched -fno-auto-inc-dec -O2 
-fmodulo-sched-allow-regmoves" } */
+
+#include 
+#include 
+
+struct df_ref_info
+{
+  unsigned int *begin;
+  unsigned int *count;
+};
+
+extern void *memset (void *s, int c, __SIZE_TYPE__ n);
+
+
+__attribute__ ((noinline))
+int
+df_reorganize_refs_by_reg_by_insn (struct df_ref_info *ref_info,
+  int num, unsigned int start)
+{
+  unsigned int m = num;
+  unsigned int offset = 77;
+  unsigned int r;
+
+  for (r = start; r 

Re: [patch, testsuite] Fix vectorizer testsuite failures on ARM

2011-06-13 Thread Ira Rosen
On 9 June 2011 13:00, Ira Rosen  wrote:
> Hi,
>
> This patch fixes several vectorizer testsuite failures on ARM:
> - vect-16.c checks that the vectorization fails without -ffast-math,
> but -ffast-math is a default flag for vector tests on ARM. I renamed
> the test to no-fast-math-vect-16.c to avoid the use of the flag for
> it.
> - vect-peel-3.c and vect-peel-4.c test vectorizer's peeling heuristics
> ignoring double-word vectors. This patch changes the misalignment
> values of the data-refs.
> - bb-slp-10.c contains a misaligned store supported not only on
> vect_hw_misalign targets, but also on vect_element_align targets. The
> patch fixes that.
>
> Tested on arm-linux-gnueabi, powerpc64-suse-linux and x86_64-suse-linux.
> OK for mainline?
>

I'll commit this patch tomorrow if nobody objects.

Thanks,
Ira

> Thanks,
> Ira
>
> testsuite/ChangeLog:
>
>       * gcc.dg/vect/vect-16.c: Rename to ...
>       * gcc.dg/vect/no-fast-math-vect16.c: ... this.
>       * gcc.dg/vect/vect-peel-3.c: Adjust misalignment values
>       for double-word vectors.
>       * gcc.dg/vect/vect-peel-4.c: Likewise.
>       * gcc.dg/vect/bb-slp-10.c: Replace vect_hw_misalign with
>       vect_element_align.
>       * gcc.dg/vect/vect.exp: Run no-fast-math-* tests with
>       -fno-fast-math.
>


Fwd: Have a boehm-gc patch for gcj/rtems

2011-06-13 Thread Andrew Haley
On 12/06/11 16:45, Jie Liu wrote:
> Hi,
> 
> Java HelloWorld compiled by cross gcj for RTEMS can run without
> problem now, I think it's time for get the boehm-gc patch reviewed and
> merged.  :)  Here is the patch:
> 
> Index: mach_dep.c
> ===
> --- mach_dep.c  (revision 172224)
> +++ mach_dep.c  (working copy)
> @@ -411,7 +411,8 @@
>  word dummy;
> 
>  #   if defined(USE_GENERIC_PUSH_REGS)
> -# ifdef HAVE_BUILTIN_UNWIND_INIT
> +# if defined(HAVE_BUILTIN_UNWIND_INIT)\
> + &&!(defined(I386) && defined(RTEMS))
>  /* This was suggested by Richard Henderson as the way to   */
>  /* force callee-save registers and register windows onto   */
>  /* the stack.  */
> @@ -437,8 +438,8 @@
> for (; (char *)i < lim; i++) {
> *i = 0;
> }
> -#   if defined(MSWIN32) || defined(MSWINCE) \
> -  || defined(UTS4) || defined(LINUX) || defined(EWS4800)
> +#   if defined(MSWIN32) || defined(MSWINCE) || defined(UTS4) \
> +   || defined(LINUX) || defined(EWS4800) || defined(RTEMS)
>   (void) setjmp(regs);
>  #   else
>  (void) _setjmp(regs);
> Index: include/private/gcconfig.h
> ===
> --- include/private/gcconfig.h  (revision 172224)
> +++ include/private/gcconfig.h  (working copy)
> @@ -315,6 +315,11 @@
>  #define mach_type_known
>  #   endif
>  # endif
> +# if defined(__rtems__) && (defined(i386) || defined(__i386__))
> +#   define I386
> +#   define RTEMS
> +#   define mach_type_known
> +# endif
>  # if defined(NeXT) && defined(mc68000)
>  #   define M68K
>  #   define NEXT
> @@ -1297,6 +1302,17 @@
>  #  define STACKBOTTOM ((ptr_t)0xc000)
>  #  define DATAEND  /* not needed */
>  #   endif
> +#   ifdef RTEMS
> +#   define OS_TYPE "RTEMS"
> +#   include 
> +extern int etext[];
> +extern int end[];
> +void *rtems_get_stack_bottom();
> +#   define InitStackBottom rtems_get_stack_bottom()
> +#   define DATASTART ((ptr_t)etext)
> +#   define DATAEND ((ptr_t)end)
> +#   define STACKBOTTOM ((ptr_t)InitStackBottom)
> +#   endif
>  #   ifdef DOS4GW
>  # define OS_TYPE "DOS4GW"
>extern long __nullarea;
> @@ -2370,7 +2386,8 @@
>  #   else
>  # if defined(NEXT) || defined(DOS4GW) || \
>  (defined(AMIGA) && !defined(GC_AMIGA_FASTALLOC)) || \
> -(defined(SUNOS5) && !defined(USE_MMAP))
> +(defined(SUNOS5) && !defined(USE_MMAP)) || \
> + defined(RTEMS)
>  #   define GET_MEM(bytes) HBLKPTR((size_t) \
>   calloc(1, (size_t)bytes
> + GC_page_size) \
>   + GC_page_size-1)
> Index: configure
> ===
> --- configure   (revision 172224)
> +++ configure   (working copy)
> @@ -14867,6 +14867,8 @@
>  $as_echo "#define _REENTRANT 1" >>confdefs.h
> 
>  ;;
> + rtems)
> +;;
>   decosf1 | irix | mach | os2 | dce | vxworks)
>  as_fn_error "thread package $THREADS not yet supported" "$LINENO" 5
>  ;;
> Index: os_dep.c
> ===
> --- os_dep.c(revision 172224)
> +++ os_dep.c(working copy)
> @@ -1507,7 +1507,7 @@
> 
>  # if !defined(OS2) && !defined(PCR) && !defined(AMIGA) \
> && !defined(MSWIN32) && !defined(MSWINCE) \
> -   && !defined(MACOS) && !defined(DOS4GW)
> +   && !defined(MACOS) && !defined(DOS4GW) && !defined(RTEMS)
> 
>  # ifdef SUNOS4
>  extern caddr_t sbrk();
> 
> Best Regards,
> Jie
> 
> 2011/6/4 Jie Liu :
>> Hi,
>>
>> I am working on porting gcj to rtems now, it's a project of GSoC2011.[1]
>> And now, the first step: boehm-gc have been ported,  so I want to get
>> this patch reviewed and merged.
>>
>> And I have filed the FSF Paperwork, the patch has been attached.
>> Thank you for your time.
>>
>> Best Regards,
>> Jie
>>
>> [1]http://socghop.appspot.com/gsoc/org/google/gsoc2011/rtems
>>



Cgraph alias reorg 19/14 (inline functions called once through alias)

2011-06-13 Thread Jan Hubicka
Hi,
tramp3d and some other benchmarks regressed as a result of switch to new
represnetaiton of same body aliases.  The reason is that inliner doesn't handle
functions with aliases so well as it used to.  The problems are:
1) functions with aliases are not inlined when called once
2) we now have more same comdat groups and inliner never removed offline copy 
of such functions
3) early inlinining is weaker since topological sort ignore aliases.

This patch fixes 1)

Bootstrapped/regtested x86_64-linux, comitted.

Honza

* cgraph.c (cgraph_for_node_thunks_and_aliases,
cgraph_for_node_and_aliases): Fix thinko in recursive walking.
(nonremovable_p): New function.
(cgraph_can_remove_if_no_direct_calls_p): New function.
(used_from_object_file_p): New functoin.
(cgraph_will_be_removed_from_program_if_no_direct_calls): Look for 
references
from aliases.
* cgraph.h (cgraph_can_remove_if_no_direct_calls_p): Bring offline.
* ipa-inline.c (check_caller_edge): New function.
(want_inline_function_called_once_p): Use it; accept aliases called 
once, too.
* ipa-inline-analysis.c (do_estimate_growth): Remove FIXME.

Index: cgraph.c
===
*** cgraph.c(revision 174958)
--- cgraph.c(working copy)
*** cgraph_for_node_thunks_and_aliases (stru
*** 2567,2580 
  if (e->caller->thunk.thunk_p
&& (include_overwritable
|| cgraph_function_body_availability (e->caller)))
!   cgraph_for_node_thunks_and_aliases (e->caller, callback, data, 
include_overwritable);
for (i = 0; ipa_ref_list_refering_iterate (&node->ref_list, i, ref); i++)
  if (ref->use == IPA_REF_ALIAS)
{
struct cgraph_node *alias = ipa_ref_refering_node (ref);
if (include_overwritable
|| cgraph_function_body_availability (alias) > AVAIL_OVERWRITABLE)
!   cgraph_for_node_thunks_and_aliases (alias, callback, data, 
include_overwritable);
}
return false;
  }
--- 2567,2584 
  if (e->caller->thunk.thunk_p
&& (include_overwritable
|| cgraph_function_body_availability (e->caller)))
!   if (cgraph_for_node_thunks_and_aliases (e->caller, callback, data,
! include_overwritable))
!   return true;
for (i = 0; ipa_ref_list_refering_iterate (&node->ref_list, i, ref); i++)
  if (ref->use == IPA_REF_ALIAS)
{
struct cgraph_node *alias = ipa_ref_refering_node (ref);
if (include_overwritable
|| cgraph_function_body_availability (alias) > AVAIL_OVERWRITABLE)
! if (cgraph_for_node_thunks_and_aliases (alias, callback, data,
! include_overwritable))
!   return true;
}
return false;
  }
*** cgraph_for_node_and_aliases (struct cgra
*** 2600,2606 
struct cgraph_node *alias = ipa_ref_refering_node (ref);
if (include_overwritable
|| cgraph_function_body_availability (alias) > AVAIL_OVERWRITABLE)
!   cgraph_for_node_and_aliases (alias, callback, data, 
include_overwritable);
}
return false;
  }
--- 2604,2612 
struct cgraph_node *alias = ipa_ref_refering_node (ref);
if (include_overwritable
|| cgraph_function_body_availability (alias) > AVAIL_OVERWRITABLE)
!   if (cgraph_for_node_and_aliases (alias, callback, data,
!  include_overwritable))
!   return true;
}
return false;
  }
*** cgraph_can_remove_if_no_direct_calls_and
*** 2900,2905 
--- 2906,2941 
return true;
  }
  
+ /* Worker for cgraph_can_remove_if_no_direct_calls_p.  */
+ 
+ static bool
+ nonremovable_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
+ {
+   return !cgraph_can_remove_if_no_direct_calls_and_refs_p (node);
+ }
+ 
+ /* Return true when function NODE and its aliases can be removed from 
callgraph
+if all direct calls are eliminated.  */
+ 
+ bool
+ cgraph_can_remove_if_no_direct_calls_p (struct cgraph_node *node)
+ {
+   /* Extern inlines can always go, we will use the external definition.  */
+   if (DECL_EXTERNAL (node->decl))
+ return true;
+   if (node->address_taken)
+ return false;
+   return !cgraph_for_node_and_aliases (node, nonremovable_p, NULL, true);
+ }
+ 
+ /* Worker for cgraph_can_remove_if_no_direct_calls_p.  */
+ 
+ static bool
+ used_from_object_file_p (struct cgraph_node *node, void *data 
ATTRIBUTE_UNUSED)
+ {
+   return cgraph_used_from_object_file_p (node);
+ }
+ 
  /* Return true when function NODE can be expected to be removed
 from program when direct calls in this compilation unit are removed.
  
*** bool
*** 2918,2924 
  cgraph_will_be_removed_from_program_if_no_direct_calls (struct cgraph_node 
*node)
  {
gcc_assert (!no

RE: [Patch : H8300] Bug fix for bit insn and minor tweaks to insns

2011-06-13 Thread Kaushik Phatak
Hi Jeff,
Thanks for the quick review.
 
>> the right test is rtx_equal_p(operands[0], operands[1])
Committed with above changes to the bsetqi_msx, bclrqi_msx and bnotqi_msx 
patterns.

Thanks & Regards,
Kaushik Phatak
www.kpitgnutools.com

-Original Message-
From: Jeff Law [mailto:l...@redhat.com] 
Sent: 11 June 2011 00:01
To: Kaushik Phatak
Cc: gcc-patches@gcc.gnu.org; Prafulla Thakare
Subject: Re: [Patch : H8300] Bug fix for bit insn and minor tweaks to insns

-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

On 06/09/11 01:20, Kaushik Phatak wrote:
> Hi,
> The following patch fixes an ICE that is generated when the compiler tries
> to perform bit manipulation for logical operations when the source and 
> destination address does not match. The testcase is also included in the 
> patch(gcc.dg).
> The additional condition in the insn takes care of the ICE which occurs at 
> '-O1'.
> The other insn's are reordered to give preference to bit instructions using 
> existing
> constraints.
> Ok to apply?
> 
> Thanks & Regards,
> Kaushik Phatak
> www.kpitgnutools.com
> 
> 2011-06-09  Kaushik Phatak 
> 
>   * config/h8300/h8300.md (bsetqi_msx, bclrqi_msx, bnotqi_msx): Added 
>   condition to disallow non-identical memory locations.
>   (*andqi3_2, andqi3_1, iorqi3_1, xorqi3_1): Reorder insn to give
>   preference to bit manipulation instructions.
>   * gcc.dg/h8300-bit-insn-ice2.2: New testcase.
Can't the operand be MEM (reg) or MEM (const_int)?  In which case
INTVAL (XEXP (operands[], 0)) is the wrong test since you shouldn't be
applying INTVAL to a REG.  Furthermore, if you're trying to compare
CONST_INTs, those are shared and you can use pointer equality rather
than their underlying value.

Regardless, I think the right test is
rtx_equal_p (operands[0], operands[1], NULL)

With that change to bsetqi_msx, bclrqi_msx and bnotqi_msx this patch is OK.

jeff
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJN8mMBAAoJEBRtltQi2kC7NjUIAJDylrPPoI6YMk4lfRwclIPT
VCzk31q3DpZ3H5CEv8e6u7DmmJl9ng78uYHXejAqTbpAn/mDOtEWasBIUIhTACFF
UgH3bK3wwhF412Dbr/6ND/dPCMiImzSUR8PC8N6S31k8q5JmXIkfVfX/oKaSH/n6
9VxyIfh7PmGRqjMlH434DyvOvH+qPfN3jSNLVHDvUeVYKI6gnfXtR8tuHTlIw4LO
eiubgvmD4RXa6Nnpeuafs8GyTCRA5jbVU2jf3ZR3jBA08YpS/cSeECd19ytSdVuS
Au20hmDPStkJwr/0kw+QtqeZRg5BP4JWF0COYQJDFQVa64Gt6LEterBmntPJr8g=
=JdgV
-END PGP SIGNATURE-




Re: Is VIEW_CONVERT_EXPR an lvalue? (was Re: RFA (fold): PATCH for c++/49290 (folding *(T*)(ar+10)))

2011-06-13 Thread Richard Guenther
On Mon, Jun 13, 2011 at 12:10 AM, Jason Merrill  wrote:
> On 06/12/2011 06:59 AM, Richard Guenther wrote:
>>
>> The please provide a specification on what a VIEW_CONVERT_EXPR does
>> to type-based alias analysis.
>
> If the alias set of the VIEW_CONVERT_EXPR type the same as the set for the
> operand, ignore it; if it's a subset, handle it like a COMPONENT_REF;
> otherwise ignore the operand for TBAA.

Handling it as a component-ref would mean adding subsets to the
view-convert-expr
base.

> It seems like get_alias_set currently gets this backwards; it's ignoring
> outer COMPONENT_REFs instead of the inner structure.

Right, it treats it as if it were an rvalue conversion around an inner lvalue -
everything TBAA related happens on the lvalue, so if you load a
view-convert-expr(float_var) the alias set is that of the float.

>> Yes, we do handle lvalue VIEW_CONVERT_EXPRs, but that is for Ada
>> which uses it for aggregates.
>
> It also seems to be widely used for vectors, but perhaps that's only for
> rvalues.

Yes.

>> I don't want us to add more lvalue
>> VIEW_CONVERT_EXPR cases, especially not for register types.
>
> Then how do we convert an int lvalue to a volatile int lvalue?

By doing *(volatile int *)&int_var.  Alternatively you can use a MEM_REF
(which sofar isn't created by frontends but only during gimplification) of
the form MEM[&int_var, (int *)0] with TREE_THIS_VOLATILE set and
a type of volatile int.  This is btw. what you can do for your array-ref
case as well - just you wouldn't have an array-ref then, but a MEM_REF
with a non-zero (but constant) offset (which might be a limitation to you).

But I suppose you want the array-ref be folded to a constant eventually?
Or why are you not happy with the *(T *)&foo tree?

>> /* Represents viewing something of one type as being of a second type.
>>   This corresponds to an "Unchecked Conversion" in Ada and roughly to
>>   the idiom *(type2 *)&X in C.
>
> Right, that's why I thought it was an lvalue.

"roughly" ;)  It's equally roughly (type2)X in C, but that doesn't "work"
for aggregate types type2.  So we don't have a 1:1 C equivalent of
what VIEW_CONVERT_EXPR does.

>>   This code may also be used within the LHS of a MODIFY_EXPR
>
> And this.

And the following weasel-wording about "no data motion" for this
case.

Now, the middle-end shouldn't have an issue dealing with lvalue
VIEW_CONVERT_EXPRs, as MEM_REFs are exposing all
issues VIEW_CONVERT_EXPR lvalues would do.  There is
at least the folding to NOP_EXPR and eventually other frontend
specific code that might be confused though.  And of course
there is the TBAA issue, so you have to be careful to not
create wrong code - for example folding *(float *)&int_var[5]
to VIEW_CONVERT_EXPR(int_var[5]) is wrong.

Richard.

> Jason
>


Re: RFA (fold): PATCH for c++/49290 (folding *(T*)(ar+10))

2011-06-13 Thread Richard Guenther
On Mon, Jun 13, 2011 at 5:17 AM, Mike Stump  wrote:
> On Jun 12, 2011, at 3:55 AM, Richard Guenther wrote:
>> In almost all cases(*) the need for a lvalue VIEW_CONVERT_EXPR can be avoided
>> by moving the VIEW_CONVERT_EXPR to the rvalue assigned too it.  Remember that
>> VIEW_CONVERT_EXPR always conver the full object and are not allowed to
>> change sizes.
>>
>> So, do you have an example?
>
> Sure, take a divmod type of instruction.[1]  It has two outputs, a div and a 
> mod.  The instruction operates on registers, and produces two completely 
> independent values.  But really, it is a general features of the built-in 
> mechanism that Kenny and I are working on that some people would like to 
> reuse for other ports.  The general feature is that one can declare any 
> argument to a built-in to be input only, output only or input/output.  This 
> nicely matches what I think quite of lot of machines do for assembly language 
> semantics.  The support isn't to match my machine, but rather to support 
> building a port, in which 1 or more instructions have such parameters.  
> Requiring memory is a non-starter, and in fact, we already have a scheme for 
> memory_operand type things for the instructions that take memory.  The scheme 
> used for them is borrowed from C++, where we just declare that the built-in 
> takes a reference type.  This reuses most of the code paths from C++ and it 
> seems to work nicely.  I'd be tempting to use it for register references, 
> but, in my current scheme for registers, I support data flow in, out and 
> in/out at the tree level and at the rtl level.  We believe this is nice from 
> an optimizing perspective, and probably required to get the warnings about 
> using uninitialized variables correct.

That's not exactly an example - I can't think of how you want or need
to use VIEW_CONVERT_EXPRs to implement said divmod instruction or why
you would need
anything special for the _argument_ of said instruction.  An
instruction or call with multiple outputs would simply be something
like

{ div_1, mod_2 } = __builtin_divmod (arg_3);

with two SSA defs. A nice representation for the tree for { div_1,
mod_2 } remains to be found (if it should be a single tree at all, or
possibly
multiple ones).

We already play tricks for sincos for example via

tem_1 = __builtin_cexpi (arg_2);
sin_3 = REALPART_EXPR ;
cos_4 = IMAGPART_EXPR ;

which avoids the two defs by using a single def which is then decomposed.

So, can you elaborate a bit more on what you want to do with special
argument kinds?  Elaborate with an actual example, not words.

Thanks,
Richard.


[patch] Don't insert pattern statements into the code (was Fix PR tree-optimization/49318)

2011-06-13 Thread Ira Rosen
On 10 June 2011 12:14, Richard Guenther  wrote:
> In the end I think we should not generate the pattern stmt during
> pattern matching but only mark the relevant statements with a
> pattern kind.  Say, for each pattern we have a "main" statement
> that has related stmts belonging to the pattern that define uses
> of the "main" statement - mark those to refer to that "main" statement.
> For that "main" statement simply record an enum value, like,
> widening_mult.  Then only at vectorized statement
> generation time actually generate the vectorized form of the
> pattern statement.

I ended up with the following: during pattern detection a new scalar
pattern statement is created but not inserted into the code, it is
only recorded as a related statement of the last statement in the
detected pattern. Every time the last statement is being
analyzed/transformed, we switch to the pattern statement instead. It
is much more difficult just to mark the last stmt with an enum value,
since we have to retrieve the relevant operands every time.

I am not sure if we need to free the pattern stmt at the end.

Bootstrapped and now testing on powerpc64-suse-linux (tested
vectorizer testsuite on powerpc64-suse-linux and x86_64-suse-linux.

What do you think?

Thanks,
Ira

ChangeLog:

 * tree-vect-loop.c (vect_determine_vectorization_factor): Don't
 remove irrelevant pattern statements.  For irrelevant statements
 check if it is the last statement of a detected pattern, use
 corresponding pattern statement instead.
 (destroy_loop_vec_info): No need to remove pattern statements,
 only free stmt_vec_info.
 (vect_transform_loop): For irrelevant statements check if it is
 the last statement of a detected pattern, use corresponding
 pattern statement instead.
 * tree-vect-patterns.c (vect_pattern_recog_1): Don't insert
 pattern statements.  Set basic block for the new statement.
 (vect_pattern_recog): Update documentation.
 * tree-vect-stmts.c (vect_mark_stmts_to_be_vectorized): Scan
 operands of pattern statements.
 (vectorizable_call): Fix printing.  In case of a pattern statement
 use the lhs of the original statement when creating a dummy
 statement to replace the original call.
 (vect_analyze_stmt): For irrelevant statements check if it is
 the last statement of a detected pattern, use corresponding
 pattern statement instead.
 * tree-vect-slp.c (vect_schedule_slp_instance): For pattern
 statements use gsi of the original statement.
Index: tree-vect-loop.c
===
--- tree-vect-loop.c(revision 174982)
+++ tree-vect-loop.c(working copy)
@@ -244,7 +244,7 @@ vect_determine_vectorization_factor (loop_vec_info
   for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
 {
  tree vf_vectype;
- gimple stmt = gsi_stmt (si);
+ gimple stmt = gsi_stmt (si), pattern_stmt;
  stmt_info = vinfo_for_stmt (stmt);
 
  if (vect_print_dump_info (REPORT_DETAILS))
@@ -258,20 +258,26 @@ vect_determine_vectorization_factor (loop_vec_info
  /* Skip stmts which do not need to be vectorized.  */
  if (!STMT_VINFO_RELEVANT_P (stmt_info)
  && !STMT_VINFO_LIVE_P (stmt_info))
-   {
-  if (is_pattern_stmt_p (stmt_info))
+{
+  if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+  && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
+  && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
+  || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
 {
-   /* We are not going to vectorize this pattern statement,
-  therefore, remove it.  */
-  gimple_stmt_iterator tmp_gsi = gsi_for_stmt (stmt);
-  STMT_VINFO_RELATED_STMT (stmt_info) = NULL;
-  gsi_remove (&tmp_gsi, true);
-  free_stmt_vec_info (stmt);
+  stmt = pattern_stmt;
+  stmt_info = vinfo_for_stmt (pattern_stmt);
+  if (vect_print_dump_info (REPORT_DETAILS))
+{
+  fprintf (vect_dump, "==> examining pattern statement: ");
+  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
+}
 }
-
- if (vect_print_dump_info (REPORT_DETAILS))
-   fprintf (vect_dump, "skip.");
- continue;
+  else
+   {
+ if (vect_print_dump_info (REPORT_DETAILS))
+   fprintf (vect_dump, "skip.");
+ continue;
+}
}
 
  if (gimple_get_lhs (stmt) == NULL_TREE)
@@ -828,25 +834,17 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, b
 
   if (stmt_info)
 {
-  /* Check if this is a "pattern stmt"

Ping: [testsuite]: Skip tests for targets with int < 32 bits

2011-06-13 Thread Georg-Johann Lay

Ping #1 for:
http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00746.html


Georg-Johann Lay:

This patch fixes testsuite failures because the testcases assume
sizeof(int) >= 4.

* gcc.c-torture/compile/pr49029.c: Add dg-require-effective-target
int32plus
* gcc.c-torture/compile/pr49163.c: Ditto.


Do not stream BINFO_VIRTUALs to ltrans unit

2011-06-13 Thread Jan Hubicka
Hi,
by accident I noticed that BINFO_VIRTUALs streaming is really expensive. It
about doubles amount of IL and types streamed by Mozilla.

One obvious optimization is to not stream into ltrans unit where it is
too late to do any useful devirtualization anyway.
Doing so reduces /tmp usage from 1.7GB to 1.1GB and proportionaly reduces
streaming out time.

Bootstrapped/regtested x86_64-linux.
OK?
Honza
* lto-streamer-out.c (lto_output_ts_binfo_tree_pointers): Do not
stream BINFO_VIRTUALS to ltrans units.
Index: lto-streamer-out.c
===
--- lto-streamer-out.c  (revision 174985)
+++ lto-streamer-out.c  (working copy)
@@ -1117,7 +1117,11 @@
 
   lto_output_tree_or_ref (ob, BINFO_OFFSET (expr), ref_p);
   lto_output_tree_or_ref (ob, BINFO_VTABLE (expr), ref_p);
-  lto_output_tree_or_ref (ob, BINFO_VIRTUALS (expr), ref_p);
+  /* BINFO_VIRTUALS is used to drive type based devirtualizatoin.  It often 
links
+ together large portions of programs making it harder to partition.  
Becuase
+ devirtualization is interesting before inlining, only, there is no real
+ need to ship it into ltrans partition.  */
+  lto_output_tree_or_ref (ob, flag_wpa ? NULL : BINFO_VIRTUALS (expr), ref_p);
   lto_output_tree_or_ref (ob, BINFO_VPTR_FIELD (expr), ref_p);
 
   output_uleb128 (ob, VEC_length (tree, BINFO_BASE_ACCESSES (expr)));


Re: [Patch : H8300] Bug fix for bit insn and minor tweaks to insns

2011-06-13 Thread Eric Botcazou
> 2011-06-09  Kaushik Phatak 
>
>   * config/h8300/h8300.md (bsetqi_msx, bclrqi_msx, bnotqi_msx): Added
>   condition to disallow non-identical memory locations.
>   (*andqi3_2, andqi3_1, iorqi3_1, xorqi3_1): Reorder insn to give
>   preference to bit manipulation instructions.
>   * gcc.dg/h8300-bit-insn-ice2.2: New testcase.

The testsuite has its own ChangeLog file, please move the last line to it.

-- 
Eric Botcazou


Re: [Patch : H8300] Bug fix for bit insn and minor tweaks to insns

2011-06-13 Thread H.J. Lu
On Mon, Jun 13, 2011 at 2:40 AM, Kaushik Phatak
 wrote:
> Hi Jeff,
> Thanks for the quick review.
>
>>> the right test is rtx_equal_p(operands[0], operands[1])
> Committed with above changes to the bsetqi_msx, bclrqi_msx and bnotqi_msx 
> patterns.
>

Commit is incorrect.  Testsuite ChangeLog entries should be in
testsuite/ChangeLog.
You have duplicated lines in gcc.dg/h8300-bit-insn-ice2.c and filename
is ChangeLog
is incorrect.  I checked in this fix.

-- 
H.J.
---
Index: ChangeLog
===
--- ChangeLog   (revision 174986)
+++ ChangeLog   (working copy)
@@ -4,7 +4,6 @@
condition to disallow non-identical memory locations.
(*andqi3_2, andqi3_1, iorqi3_1, xorqi3_1): Reorder insn to give
preference to bit manipulation instructions.
-   * gcc.dg/h8300-bit-insn-ice2.2: New testcase.

 2011-06-13  Jan Hubicka  

Index: testsuite/gcc.dg/h8300-bit-insn-ice2.c
===
--- testsuite/gcc.dg/h8300-bit-insn-ice2.c  (revision 174986)
+++ testsuite/gcc.dg/h8300-bit-insn-ice2.c  (working copy)
@@ -13,18 +13,3 @@ main (void)
   MSTPCRA = MSTPCRA2 | ~0xFE;
   return 0;
 }
-/* { dg-skip-if "" { "h8300*-*-*" } "*" "-msx*" }  */
-/* { dg-options "-O2" } */
-/* ICE for bit instruction generation using 16-bit const */
-
-#define MSTPCRA (*(volatile unsigned char*)0xC9)
-#define MSTPCRA2 (*(volatile unsigned char*)0xFFFDC8)
-
-int
-main (void)
-{
-  MSTPCRA = MSTPCRA2 & ~0x01;
-  MSTPCRA = MSTPCRA2 ^ ~0xFE;
-  MSTPCRA = MSTPCRA2 | ~0xFE;
-  return 0;
-}
Index: testsuite/ChangeLog
===
--- testsuite/ChangeLog (revision 174986)
+++ testsuite/ChangeLog (working copy)
@@ -1,3 +1,11 @@
+2011-06-13  H.J. Lu  
+
+   * gcc.dg/h8300-bit-insn-ice2.c: Remove duplicated lines.
+
+2011-06-13  Kaushik Phatak 
+
+   * gcc.dg/h8300-bit-insn-ice2.c: New testcase.
+
 2011-06-13  Thomas Koenig  

* gfortran.dg/trim_optimize_8.f90:  New test case.


Cgraph alias reorg 20/14 (inliner and same comdat groups)

2011-06-13 Thread Jan Hubicka
Hi,
this patch makes inliner to handle removal of offline copies of functions in
the same comdat groups.  Since aliases are now functions and a lot of comdat
functions do have aliases, we end up having a lot more comdat groups than 
before.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

* ipa-inline-transform.c (can_remove_node_now_p_1): Break out from...
(can_remove_node_now_p): ... here; handle same comdat groups.
(clone_inlined_nodes): Update use of can_remove_node_now_p add TODO.
(inline_call): Update use of can_remove_node_now_p.
Index: ipa-inline-transform.c
===
--- ipa-inline-transform.c  (revision 174970)
+++ ipa-inline-transform.c  (working copy)
@@ -83,7 +83,7 @@ update_noncloned_frequencies (struct cgr
copy of function was removed.  */
 
 static bool
-can_remove_node_now_p (struct cgraph_node *node)
+can_remove_node_now_p_1 (struct cgraph_node *node)
 {
   /* FIXME: When address is taken of DECL_EXTERNAL function we still
  can remove its offline copy, but we would need to keep unanalyzed node in
@@ -98,15 +98,34 @@ can_remove_node_now_p (struct cgraph_nod
  && (!DECL_VIRTUAL_P (node->decl)
  || (!DECL_COMDAT (node->decl)
  && !DECL_EXTERNAL (node->decl)))
- /* Don't reuse if more than one function shares a comdat group.
-If the other function(s) are needed, we need to emit even
-this function out of line.  */
- && !node->same_comdat_group
  /* During early inlining some unanalyzed cgraph nodes might be in the
 callgraph and they might reffer the function in question.  */
  && !cgraph_new_nodes);
 }
 
+/* We are going to eliminate last direct call to NODE (or alias of it) via 
edge E.
+   Verify that the NODE can be removed from unit and if it is contained in 
comdat
+   group that the whole comdat group is removable.  */
+
+static bool
+can_remove_node_now_p (struct cgraph_node *node, struct cgraph_edge *e)
+{
+  struct cgraph_node *next;
+  if (!can_remove_node_now_p_1 (node))
+return false;
+
+  /* When we see same comdat group, we need to be sure that all
+ items can be removed.  */
+  if (!node->same_comdat_group)
+return true;
+  for (next = node->same_comdat_group;
+   next != node; next = next->same_comdat_group)
+if (node->callers && node->callers != e
+   && !can_remove_node_now_p_1 (node))
+  return false;
+  return true;
+}
+
 
 /* E is expected to be an edge being inlined.  Clone destination node of
the edge and redirect it to the new clone.
@@ -128,8 +147,15 @@ clone_inlined_nodes (struct cgraph_edge 
  /* Recursive inlining never wants the master clone to
 be overwritten.  */
  && update_original
- && can_remove_node_now_p (e->callee))
+ && can_remove_node_now_p (e->callee, e))
{
+ /* TODO: When callee is in a comdat group, we could remove all of it,
+including all inline clones inlined into it.  That would however
+need small function inlining to register edge removal hook to
+maintain the priority queue.
+
+For now we keep the ohter functions in the group in program until
+cgraph_remove_unreachable_functions gets rid of them.  */
  gcc_assert (!e->callee->global.inlined_to);
  if (e->callee->analyzed && !DECL_EXTERNAL (e->callee->decl))
{
@@ -200,7 +226,7 @@ inline_call (struct cgraph_edge *e, bool
   while (alias && alias != callee)
{
  if (!alias->callers
- && can_remove_node_now_p (alias))
+ && can_remove_node_now_p (alias, e))
{
  next_alias = cgraph_alias_aliased_node (alias);
  cgraph_remove_node (alias);


Commit: Add support for Tile architecutres to configure.ac

2011-06-13 Thread Nick Clifton
Hi Guys,

  I am checking in the patch below to add recognition of the Tilera TILE
  architectures to the top level configure script.  A patch to support
  these architectures has just been committed to the binutils sources.

Cheers
  Nick

./ChangeLog
2011-06-10  Walter Lee 

* configure.ac (tilepro-*-*) New case.
(tilegx-*-*): Likewise.
* configure: Regenerate.
 
Index: configure.ac
===
RCS file: /cvs/src/src/configure.ac,v
retrieving revision 1.154
diff -u -3 -p -r1.154 configure.ac
--- configure.ac9 May 2011 17:56:47 -   1.154
+++ configure.ac13 Jun 2011 15:19:11 -
@@ -1054,6 +1054,9 @@ case "${target}" in
   tic6x-*-*)
 noconfigdirs="$noconfigdirs gdb sim"
 ;;
+  tilepro-*-* | tilegx-*-*)
+noconfigdirs="$noconfigdirs sim"
+;;
   v810-*-*)
 noconfigdirs="$noconfigdirs bfd binutils gas gdb ld opcodes 
target-libgloss"
 ;;


Re: Typed DWARF stack and convert to untyped

2011-06-13 Thread Tom Tromey
> "Jakub" == Jakub Jelinek  writes:

Jakub> I'd like to propose convert to untyped operation, e.g.
Jakub> DW_OP_GNU_convert <0> could do it (and maybe DW_OP_GNU_reinterpret <0>),
Jakub> these would convert to an integral value of the same size as DWARF
Jakub> address and make it untyped.  As DW_OP_GNU_convert operand is
Jakub> uleb128 DIE offset within current CU, offset 0 certainly won't
Jakub> contain any DIEs, because it is the first byte of the CU header.

FWIW, I wrote the GDB change for this.  It is ready to go in whenever
this patch is accepted for GCC.

Tom


Re: [RFA] [PR44618] [PowerPC] Wrong code for -frename-registers

2011-06-13 Thread edmar

David,

I realized I did not "reply to all" when I first got your email.
You may have my email in your spam filter.

The issue is that I don't have WAA, and as Richard Guenther suggestion,
I want to ask if you could to do the commit for Freescale.

Thanks in advance for your patience.

In reference to: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44618

Regards,
Edmar


On 05/25/2011 09:43 AM, David Edelsohn wrote:

On Mon, May 23, 2011 at 5:53 PM, edmar  wrote:

I completed re-testing everything.
It turns out I cannot reproduce the original error on gcc-4.4 (rev 173968)
So, I am submitting only the patch that I tested for gcc-4.5/4.6/4.7

Regression tested for e500mc target on:
4.5: Revision: 173928
4.6: Revision: 173936
trunk: Revision: 173966

The patch gcc.fix_rnreg4 applies directly to 4.6, 4.7 (1 line offset),
and 4.5 (-632 lines offset)

Are you re-asking for approval?

The patch is okay.

Thanks, David

P.S. Please include the ChangeLog entry inline in the email message
and attach the patch to the email if it is large.  No tar files.

.






Cgraph alias reorg 21/14 (topological sorting for early opts)

2011-06-13 Thread Jan Hubicka
Hi.
this patch makes ipa_reverse_postorder to walk the alias references improving
effectivit of early opts on units with aliases.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

* ipa-utils.c (postorder_stack): New structure.
(ipa_reverse_postorder): Handle aliases.

Index: ipa-utils.c
===
--- ipa-utils.c (revision 174958)
+++ ipa-utils.c (working copy)
@@ -233,6 +233,13 @@ ipa_free_postorder_info (void)
 }
 }
 
+struct postorder_stack
+{
+  struct cgraph_node *node;
+  struct cgraph_edge *edge;
+  int ref;
+};
+
 /* Fill array order with all nodes with output flag set in the reverse
topological order.  Return the number of elements in the array.
FIXME: While walking, consider aliases, too.  */
@@ -243,11 +250,12 @@ ipa_reverse_postorder (struct cgraph_nod
   struct cgraph_node *node, *node2;
   int stack_size = 0;
   int order_pos = 0;
-  struct cgraph_edge *edge, last;
+  struct cgraph_edge *edge;
   int pass;
+  struct ipa_ref *ref;
 
-  struct cgraph_node **stack =
-XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
+  struct postorder_stack *stack =
+XCNEWVEC (struct postorder_stack, cgraph_n_nodes);
 
   /* We have to deal with cycles nicely, so use a depth first traversal
  output algorithm.  Ignore the fact that some functions won't need
@@ -261,47 +269,51 @@ ipa_reverse_postorder (struct cgraph_nod
  && (pass
  || (!node->address_taken
  && !node->global.inlined_to
- && !cgraph_only_called_directly_or_aliased_p (node
+ && !node->alias && !node->thunk.thunk_p
+ && !cgraph_only_called_directly_p (node
{
- node2 = node;
- if (!node->callers)
-   node->aux = &last;
- else
-   node->aux = node->callers;
- while (node2)
+ stack_size = 0;
+  stack[stack_size].node = node;
+ stack[stack_size].edge = node->callers;
+ stack[stack_size].ref = 0;
+ node->aux = (void *)(size_t)1;
+ while (stack_size >= 0)
{
- while (node2->aux != &last)
+ while (true)
{
- edge = (struct cgraph_edge *) node2->aux;
- if (edge->next_caller)
-   node2->aux = edge->next_caller;
- else
-   node2->aux = &last;
- /* Break possible cycles involving always-inline
-functions by ignoring edges from always-inline
-functions to non-always-inline functions.  */
- if (DECL_DISREGARD_INLINE_LIMITS (edge->caller->decl)
- && !DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl))
-   continue;
- if (!edge->caller->aux)
+ node2 = NULL;
+ while (stack[stack_size].edge && !node2)
{
- if (!edge->caller->callers)
-   edge->caller->aux = &last;
- else
-   edge->caller->aux = edge->caller->callers;
- stack[stack_size++] = node2;
+ edge = stack[stack_size].edge;
  node2 = edge->caller;
- break;
+ stack[stack_size].edge = edge->next_caller;
+ /* Break possible cycles involving always-inline
+functions by ignoring edges from always-inline
+functions to non-always-inline functions.  */
+ if (DECL_DISREGARD_INLINE_LIMITS (edge->caller->decl)
+ && !DECL_DISREGARD_INLINE_LIMITS
+   (cgraph_function_node (edge->callee, NULL)->decl))
+   node2 = NULL;
+   }
+ for (;ipa_ref_list_refering_iterate 
(&stack[stack_size].node->ref_list,
+  stack[stack_size].ref,
+  ref) && !node2;
+  stack[stack_size].ref++)
+   {
+ if (ref->use == IPA_REF_ALIAS)
+   node2 = ipa_ref_refering_node (ref);
+   }
+ if (!node2)
+   break;
+ if (!node2->aux)
+   {
+ stack[++stack_size].node = node2;
+ stack[stack_size].edge = node2->callers;
+ stack[stack_size].ref = 0;
+ node2->aux = (void *)(size_t)1;
}
}
- if (node2->aux == &last)
-   {
- order[order_pos++] = node2;
- if (stack_size)
-   node2 = stack[--stack_size];
- else
-   node2 = NULL;
-   

Re: Cgraph alias reorg 8/14 (ipa-cp and ipa-prop update)

2011-06-13 Thread Martin Jambor
Hi,

On Fri, Jun 10, 2011 at 04:55:43PM +0200, Jan Hubicka wrote:
> Hi,

> this patch updated ipa-cp and ipa-prop for aliases.  It is basically
> easy - we don't analyze nodes represneting aliases and when
> propagating we skip them, like everywhere else.
> 
> There are two problems I noticed.  First we should not propagate
> through calls that are overwritable.  When non-overwritable function
> A has overwritable alias B, we should propagate through calls to A,
> but not throug calls to B.  As discussed on IRC it is probably best
> to zap the jump functions in question at IPA stage (because at
> analysis stage it is not clear at all if the alias will end up
> overwritable with LTO).
> 

OK.  Nevertheless, I'd prefer to do this in context of the new
IPA-CP.  

> Similar problem already exists with code in
> ipa_compute_jump_functions that looks into a callee that might
> change with LTO.

I either don't understand or fail to see how this is different from
the first problem.  We even compute jump functions of indirect edges
precisely because we hope they will be changed later on...


> Index: ipa-cp.c
> ===
> --- ipa-cp.c  (revision 174905)
> +++ ipa-cp.c  (working copy)
> @@ -818,7 +828,7 @@ ipcp_iterate_stage (void)
>  /* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
> This change should be propagated.  */
>  {
> -  gcc_assert (n_cloning_candidates);
> +  /*gcc_assert (n_cloning_candidates);*/
>ipcp_propagate_stage ();
>  }
>if (dump_file)


I know this assert can be horribly irritating but so far it has been
very useful at spotting all kinds of errors at various places.  (In
fact, you added it :-)

But as I want to get the whole IPA-CP replaced, I don't care all that
much.

Martin


Re: RFA (fold): PATCH for c++/49290 (folding *(T*)(ar+10))

2011-06-13 Thread Jason Merrill

On 06/13/2011 06:51 AM, Richard Guenther wrote:

But I suppose you want the array-ref be folded to a constant eventually?


Right.

I'm not going to keep arguing about VIEW_CONVERT_EXPR, but that brings 
me back to my original question: is it OK to add a permissive mode to 
the function, or should I copy the whole thing into the front end?


Jason


Re: [testsuite]: Skip tests for targets with int < 32 bits

2011-06-13 Thread Mike Stump
On Jun 9, 2011, at 9:27 AM, Georg-Johann Lay wrote:
> This patch fixes testsuite failures because the testcases assume
> sizeof(int) >= 4.
> 
>   * gcc.c-torture/compile/pr49029.c: Add dg-require-effective-target
>   int32plus
>   * gcc.c-torture/compile/pr49163.c: Ditto.

Well, since no one objected to it, I'll approve it.  I was hoping that a 
specific maintainer would approve or reject it.  :-(


[patch, C++] Fix g++.dg/cpp0x/nullptr21.C on IA64 HP-UX

2011-06-13 Thread Steve Ellcey
The test g++.dg/cpp0x/nullptr21.C fails on IA64 HP-UX in 32 bit mode and
in looking at the problem I tracked it down to some code in
cxx_init_decl_processing that was added in r159131 to create nullptr_node:

2010-05-06  Magnus Fromreide  
Jason Merrill  

Add support for C++0x nullptr.

The type for nullptr_node is nullptr_type_node and it is set up with a type,
size, and alignment of ptr_mode.  But the actual mode set with SET_TYPE_MODE
is set to Pmode instead of ptr_mode.  I don't know why, it may just be a typo.

This causes problems on IA64 HP-UX in 32 bit mode because ptr_mode is 4 bytes
(and 4 byte aligned) but Pmode is 8 bytes and 8 byte aligned.  The code
generated for nullptr21.C winds up doing an 8 byte load for nullptr on an
address that is only 4 btye aligned and the code seg faults with a misaligned
read.

This patch changes Pmode to ptr_mode in the SET_TYPE_MODE call and that fixes
the problem.  It also caused no regressions on IA64 HP-UX, IA64 Linux, or
x86 Linux.

OK for checkin?

Steve Ellcey
s...@cup.hp.com


2011-06-13  Steve Ellcey  

* decl.c (cxx_init_decl_processing): Use ptr_mode instead of Pmode.


Index: decl.c
===
--- decl.c  (revision 174979)
+++ decl.c  (working copy)
@@ -3672,7 +3672,7 @@
 TYPE_SIZE_UNIT (nullptr_type_node) = size_int (GET_MODE_SIZE (ptr_mode));
 TYPE_UNSIGNED (nullptr_type_node) = 1;
 TYPE_PRECISION (nullptr_type_node) = GET_MODE_BITSIZE (ptr_mode);
-SET_TYPE_MODE (nullptr_type_node, Pmode);
+SET_TYPE_MODE (nullptr_type_node, ptr_mode);
 record_builtin_type (RID_MAX, "decltype(nullptr)", nullptr_type_node);
 nullptr_node = build_int_cst (nullptr_type_node, 0);
   }


Re: [patch, C++] Fix g++.dg/cpp0x/nullptr21.C on IA64 HP-UX

2011-06-13 Thread Jason Merrill

OK.

Jason


Re: [gcc patch 1/3] cp-demangle.c: Make `options' a parameter

2011-06-13 Thread Ian Lance Taylor
On Thu, Jun 2, 2011 at 7:15 AM, Jan Kratochvil
 wrote:
>
> libiberty/
> 2011-05-24  Jan Kratochvil  
>
>        * cp-demangle.c (struct d_print_info): Remove field options.
>        (d_print_init): Remove parameter options.
>        (cplus_demangle_print_callback): Update all the callers.
>        (d_print_comp, d_print_mod_list, d_print_mod, d_print_function_type)
>        (d_print_array_type, d_print_expr_op, d_print_cast, d_print_subexpr):
>        Add parameter options, update all the callers.

>  static void
> -d_print_comp (struct d_print_info *, const struct demangle_component *);
> +d_print_comp (struct d_print_info *, const struct demangle_component *, int);

The new options parameter should be the second parameter, not the last.

>  static void
> -d_print_mod_list (struct d_print_info *, struct d_print_mod *, int);
> +d_print_mod_list (struct d_print_info *, struct d_print_mod *, int, int);

Likewise.

>  static void
> -d_print_mod (struct d_print_info *, const struct demangle_component *);
> +d_print_mod (struct d_print_info *, const struct demangle_component *, int);

Likewise.

>  static void
>  d_print_function_type (struct d_print_info *,
>                        const struct demangle_component *,
> -                       struct d_print_mod *);
> +                       struct d_print_mod *, int);

Likewise.

>  static void
>  d_print_array_type (struct d_print_info *,
>                     const struct demangle_component *,
> -                    struct d_print_mod *);
> +                    struct d_print_mod *, int);

Likewise.

>  static void
> -d_print_expr_op (struct d_print_info *, const struct demangle_component *);
> +d_print_expr_op (struct d_print_info *, const struct demangle_component *, 
> int);

Likewise.

>  static void
> -d_print_cast (struct d_print_info *, const struct demangle_component *);
> +d_print_cast (struct d_print_info *, const struct demangle_component *, int);

Likewise.

You will have to change the function definitions and calls accordingly.

This is OK with those changes.

Thanks.

Ian


Re: [Patch, AVR]: Fix PR46779

2011-06-13 Thread Georg-Johann Lay

[In CCing Richard Henderson]

Denis Chertykov schrieb:

2011/6/10 Georg-Johann Lay :


Then I have a question on spill failures:

There is PR46278, an optimization flaw that goes as follows:

The avr BE defines fake addressing mode X+const that has to be written
down in asm as
X += const
a = *X
X -= const

The comment says that this is just to cover a corner case, however the
new register allocator uses this case more often or even greedily.
There is no way to describe the cost for such an access, and as X has
lower prologue/epilogue cost than Y, X is preferred over Y often.

In 4.7, I see that flaw less often than in 4.5.  However, I think the
best way is not to lie at the register allocator and not to supply a
fake instruction like that.

So I started to work on a fix. The changes in avr.h are:

  * config/avr/avr.h (BASE_REG_CLASS): Remove.
  (REG_OK_FOR_BASE_NOSTRICT_P): Remove.
  (REG_OK_FOR_BASE_STRICT_P): Remove.
  (MODE_CODE_BASE_REG_CLASS): New Define.
  (REGNO_MODE_CODE_OK_FOR_BASE_P): New Define.

The macros REGNO_MODE_CODE_OK_FOR_BASE_P and MODE_CODE_BASE_REG_CLASS
allow a better, fine grained control over addressing modes for each
hard register and allow to support X without fake instructions. The
code quality actually improves, but I see a new spill failure for the
test case

* gcc.c-torture/compile/950612-1.c

On the one hand, that test case has heavy long long use and is no
"real world code" to run on AVR. On the other hand, I don't think
trading code quality here against ICE there is a good idea.

What do you think about that? As I have no idea how to fix a spill
failure in the backend, I stopped working on a patch.


Ohhh. It's a most complicated case for GCC for AVR.


So you think is is pointless/discouraged to give a more realistic 
description of AVR addressing be means of MODE_CODE_BASE_REG_CLASS 
(instead of BASE_REG_CLASS) resp. REGNO_MODE_CODE_OK_FOR_BASE_P?



Look carefully at `out_movqi_r_mr'.
There are even two fake addressing modes:
1. [Y + infinite-dslacement];
2. [X + (0...63)].


Yes, I know. The first is introduced by avr_legitimate_address_p and the 
second appears to be artifact of LEGITIMIZE_RELOAD_ADDRESS.


The changes are basically MODE_CODE_BASE_REG_CLASS (introduced in 4.2) 
and a rewrite of avr_legitimate_address_p. The changes aim at a better 
addressing for X and to minimize fake addresses.



I have spent a many hours (days, months) to debug GCC (especially avr port
and reload) for right addressing modes.
I have stopped on this code.
AVR have a limited memory addressing and GCC can't handle it in native form.
Because of that I have supported a fake adddressing modes.


I assume the code is from prior to 4.2 when 
REGNO_MODE_CODE_OK_FOR_BASE_P and MODE_CODE_BASE_REG_CLASS had not been 
available so that supporting X required some hacking.
All that would still be fine; however the new register allocator leads 
to code that noone would accept. Accessing a structure through a pointer 
is not uncommon, not even on AVR. So if Z is used for, say accessing 
flash, X appears to be the best register.


The shortcoming in GCC is that there is no way to give costs of 
addressing (TARGET_ADDRESS_COST does different things).


So take a look what avr-gcc compiles here:
  http://gcc.gnu.org/bugzilla/attachment.cgi?id=22242
I saw similar complains in forums on the web.


(Richard Henderson have a different opinion: GCC can, AVR port can't)


What does he mean with that?


IMHO that three limited pointer registers is not enough for C compiler.
Even more with frame pointer it's only two and X is a very limited.


The current implementation has several oddities like

* allowing SUBREGs in avr-legitimate_address_p
* changing BASE_REG_CLASS on the fly (by means of reload_completed)

isn't that supposed to cause trouble?


1. [Y + infinite-dslacement] it's helper for reload addressing.
For addressing too long local/spilled variable. (Y + more-than-63)

2. [X + (0...63)] for another one "normal" pointer address.





Then I observed trouble with DI patterns during libgcc build and had
to remove

* "zero_extendqidi2"
* "zero_extendhidi2"
* "zero_extendsidi2"

These are "orphan" insns: they deal with DI without having movdi
support so I removed them.


This seems strange for me.


As far as I know, to support a mode a respective mov insn is needed, 
which is not the case for DI. I don't know the exact rationale behind 
that (reloading?), just read is on gcc list by Ian Taylor (and also that 
it is stronly discouraged to have more than one mov insn per mode).


So if the requirement to have mov insn is dropped and without the burden 
to implement movdi, it would be rather easy to implement adddi3 and 
subdi3 for avr...


Johann


Denis.




Re: [gcc patch 2/3] cp-demangle.c: New DMGL_RET_DROP

2011-06-13 Thread Ian Lance Taylor
On Thu, Jun 2, 2011 at 7:16 AM, Jan Kratochvil
 wrote:
>
> include/
> 2011-05-24  Jan Kratochvil  
>
>        * demangle.h (DMGL_RET_POSTFIX): Extend the comment.
>        (DMGL_RET_DROP): New.
>
> libiberty/
> 2011-05-24  Jan Kratochvil  
>
>        * cp-demangle.c (d_print_comp) : Do
>        not pass DMGL_RET_POSTFIX or DMGL_RET_DROP.  Support DMGL_RET_DROP.
>        * testsuite/demangle-expected: New testcases for --ret-drop.
>        * testsuite/test-demangle.c: Document --ret-drop in a comment.
>        (main): New variable ret_drop, fill it, call cplus_demangle with it.

This is OK.

Thanks.

Ian


Re: [gcc patch 3/3] cp-demangle.c: Fix DMGL_RET_POSTFIX for inner func types

2011-06-13 Thread Ian Lance Taylor
On Thu, Jun 2, 2011 at 7:17 AM, Jan Kratochvil
 wrote:
>
> libiberty/
> 2011-05-24  Jan Kratochvil  
>
>        * cp-demangle.c (d_print_comp) :
>        Suppress d_print_mod for DMGL_RET_POSTFIX.
>        * testsuite/demangle-expected: New testcases for --ret-postfix.

This is OK.

Thanks.

Ian


Re: [testsuite]: Skip tests for targets with int < 32 bits

2011-06-13 Thread Georg-Johann Lay

Mike Stump schrieb:

On Jun 9, 2011, at 9:27 AM, Georg-Johann Lay wrote:


This patch fixes testsuite failures because the testcases assume
sizeof(int) >= 4.

* gcc.c-torture/compile/pr49029.c: Add dg-require-effective-target
int32plus
* gcc.c-torture/compile/pr49163.c: Ditto.



Well, since no one objected to it, I'll approve it.  I was hoping that a 
specific maintainer would approve or reject it.  :-(


Who is a "specific maintainer" here?
I found you (and Rainer Orth) as "testsuite" maintainers.
Or does that just refer to the test harness and not to specific test cases?

I CCed Kaz and Jakub because they added respective test cases.

Then I observed that
/* { dg-require-effective-target int32plus } */
does not work as intended for all test cases.

For exammple, I added this line to, e.g.
* gcc.c-torture/execute/cmpsi-2.c
* gcc.c-torture/execute/pr45262.c
in trunk r172757
http://gcc.gnu.org/viewcvs?view=revision&revision=172757

However, these tests are still executed as you can see in 
gcc-testresults for trunk r174959:

http://gcc.gnu.org/ml/gcc-testresults/2011-06/msg01429.html

Is this a bug resp. worth a bug report?

Johann


Re: [testsuite]: Skip tests for targets with int < 32 bits

2011-06-13 Thread Jakub Jelinek
On Mon, Jun 13, 2011 at 08:18:52PM +0200, Georg-Johann Lay wrote:
> For exammple, I added this line to, e.g.
> * gcc.c-torture/execute/cmpsi-2.c
> * gcc.c-torture/execute/pr45262.c
> in trunk r172757
> http://gcc.gnu.org/viewcvs?view=revision&revision=172757

That was a mistake.

gcc.c-torture/execute/ doesn't use the dg framework, you need
to instead add cmpsi-2.x resp. pr45262.x file alongside with
the testcase.  Look at other *.x files there for details on how they look
like.

Jakub


RFC: Fix GCSE exp_equiv_p on MEMs with different MEM_ATTRS (PR rtl-optimization/49390)

2011-06-13 Thread Jakub Jelinek
Hi!

As the testcase shows, the
http://gcc.gnu.org/ml/gcc-patches/2010-06/msg02945.html
patch looks wrong, MEM_ATTRS matters quite a lot for the
alias oracle, so ignoring it leads to miscompilations.

Instead of just reverting the patch, this patch attempts to add
some exceptions, notably when MEM_ATTRS are indirect with MEM_REF
containing an SSA_NAME as base, and the SSA_NAMEs have the same
var (maybe that check is unnecessary) and both SSA_NAMEs have the
same points-to info, we can consider them interchangeable.
But when they have different points-to info or if other details
are different, we need to play safe and return false from exp_equiv_p.

Bootstrapped/regtested on x86_64-linux and i686-linux.

Should the tests in rtx_mem_attrs_equiv_p be split into more
functions (e.g. have a points-to equiv inline function, and
perhaps most of the remaining body in tree-ssa-alias.c instead
of alias.c (like just do the ao_ref_from_mem/MEM_ATTRS/MEM_ALIGN
checks in there and keep the rest in the tree files)?

2011-06-13  Jakub Jelinek  

PR rtl-optimization/49390
* alias.c (rtx_mem_attrs_equiv_p): New function.
* alias.h (rtx_mem_attrs_equiv_p): New prototype.
* tree-ssa-alias.c (ao_ref_base_alias_set): No longer static.
* tree-ssa-alias.h (ao_ref_base_alias_set): New prototype.
* cse.c (exp_equiv_p) : If MEM_ATTRS are different,
call rtx_mem_attrs_equiv_p to see if MEM_ATTRS are interchangeable.

* gcc.c-torture/execute/pr49390.c: New test.

--- gcc/alias.c.jj  2011-05-24 23:34:28.0 +0200
+++ gcc/alias.c 2011-06-13 17:17:17.0 +0200
@@ -374,6 +374,79 @@ rtx_refs_may_alias_p (const_rtx x, const
 && MEM_ALIAS_SET (mem) != 0);
 }
 
+/* Return true if MEM_ATTRS of X and Y are equivalent for the
+   alias oracle.  */
+
+bool
+rtx_mem_attrs_equiv_p (const_rtx x, const_rtx y)
+{
+  ao_ref ref1, ref2;
+
+  if (MEM_ATTRS (x) == MEM_ATTRS (y))
+return true;
+
+  if (!ao_ref_from_mem (&ref1, x)
+  || !ao_ref_from_mem (&ref2, y))
+return false;
+
+  if (MEM_ALIGN (x) != MEM_ALIGN (y))
+return false;
+
+  if (ref1.base == NULL
+  || ref2.base == NULL
+  || ref1.ref == NULL
+  || ref2.ref == NULL
+  || MEM_ALIGN (x) != MEM_ALIGN (y)
+  || TREE_CODE (ref1.ref) != TREE_CODE (ref2.ref)
+  || TREE_CODE (ref1.base) != TREE_CODE (ref2.base)
+  || ref1.offset != ref2.offset
+  || ref1.size != ref2.size
+  || ref1.max_size != ref2.max_size
+  || ao_ref_alias_set (&ref1) != ao_ref_alias_set (&ref2)
+  || ao_ref_base_alias_set (&ref1) != ao_ref_base_alias_set (&ref2))
+return false;
+
+  if (TREE_CODE (ref1.base) == MEM_REF
+  && TREE_CODE (TREE_OPERAND (ref1.base, 0)) == SSA_NAME
+  && TREE_CODE (TREE_OPERAND (ref2.base, 0)) == SSA_NAME)
+{
+  tree v1 = TREE_OPERAND (ref1.base, 0);
+  tree v2 = TREE_OPERAND (ref2.base, 0);
+  struct ptr_info_def *pi1, *pi2;
+
+  if (SSA_NAME_VAR (v1) != SSA_NAME_VAR (v2)
+ || !types_compatible_p (TREE_TYPE (ref1.base), TREE_TYPE (ref2.base))
+ || TREE_OPERAND (ref1.base, 1) != TREE_OPERAND (ref2.base, 1))
+   return false;
+
+  pi1 = SSA_NAME_PTR_INFO (v1);
+  pi2 = SSA_NAME_PTR_INFO (v2);
+  if (pi1 == NULL || pi2 == NULL)
+   return pi1 == NULL && pi2 == NULL;
+  if (pi1->align != pi2->align
+ || pi1->misalign != pi2->misalign
+ || pi1->pt.anything != pi2->pt.anything
+ || pi1->pt.nonlocal != pi2->pt.nonlocal
+ || pi1->pt.escaped != pi2->pt.escaped
+ || pi1->pt.ipa_escaped != pi2->pt.ipa_escaped
+ || pi1->pt.null != pi2->pt.null
+ || pi1->pt.vars_contains_global != pi2->pt.vars_contains_global
+ || pi1->pt.vars_contains_restrict != pi2->pt.vars_contains_restrict)
+   return false;
+
+  if (pi1->pt.vars == NULL || pi2->pt.vars == NULL)
+   return pi1->pt.vars == NULL && pi2->pt.vars == NULL;
+
+  if (pi1->pt.vars == pi2->pt.vars
+ || bitmap_equal_p (pi1->pt.vars, pi2->pt.vars))
+   return true;
+
+  return false;
+}
+
+  return false;
+}
+
 /* Returns a pointer to the alias set entry for ALIAS_SET, if there is
such an entry, or NULL otherwise.  */
 
--- gcc/alias.h.jj  2011-01-06 10:21:52.0 +0100
+++ gcc/alias.h 2011-06-13 17:05:46.0 +0200
@@ -43,6 +43,7 @@ extern int alias_sets_conflict_p (alias_
 extern int alias_sets_must_conflict_p (alias_set_type, alias_set_type);
 extern int objects_must_conflict_p (tree, tree);
 extern int nonoverlapping_memrefs_p (const_rtx, const_rtx, bool);
+extern bool rtx_mem_attrs_equiv_p (const_rtx, const_rtx);
 
 /* This alias set can be used to force a memory to conflict with all
other memories, creating a barrier across which no memory reference
--- gcc/tree-ssa-alias.c.jj 2011-05-31 08:03:10.0 +0200
+++ gcc/tree-ssa-alias.c2011-06-13 17:16:36.0 +0200
@@ -489,7 +489,7 @@ ao_ref

Re: [testsuite]: Skip tests for targets with int < 32 bits

2011-06-13 Thread Mike Stump
On Jun 13, 2011, at 11:18 AM, Georg-Johann Lay wrote:
> Who is a "specific maintainer" here?

I'd be happy to have the author of the testcase weigh in, or someone that cares 
about int32plus, or the the person that fixed the bug in the compiler for which 
the testcase was written...

> I found you (and Rainer Orth) as "testsuite" maintainers.
> Or does that just refer to the test harness and not to specific test cases?

I consider the situation much like the role of a global write privs person.  
They, in theory, can approve a C++ change, but such changes are at least at 
times, better reviewed and approved by a C++ person.  I'd rather haver a arm 
person ok the gcc.target/arm/* testcases, I'd rather have an x86_64 person 
review and approve gcc.target/i386, I'd rather have a fortran person review 
gcc.fotran changes.

> Then I observed that
> /* { dg-require-effective-target int32plus } */
> does not work as intended for all test cases.

Ah, yes, right.  Longer term, I think that's a bug we should fix.  For now, as 
Jakub points out, you need to create a .x file for them.  I'd like to see the 
.x files go away.

> For exammple, I added this line to, e.g.
> * gcc.c-torture/execute/cmpsi-2.c
> * gcc.c-torture/execute/pr45262.c
> in trunk r172757
> http://gcc.gnu.org/viewcvs?view=revision&revision=172757
> 
> However, these tests are still executed as you can see in gcc-testresults for 
> trunk r174959:
> http://gcc.gnu.org/ml/gcc-testresults/2011-06/msg01429.html

I don't understand why you'd propose a change that doesn't work?  In general, 
maintainers rely upon contributors to test and ensure that a change does 
something worthwhile.  So, yes, I agree with Jakub, this part of the patch 
should be reverted, and an .x file created/modified, or possibly the testcase 
modified to be more portable.  If you want to enhance the driver to process the 
dg stuff, I like that direction.

> Is this a bug resp. worth a bug report?

If you want, though, I think it might make more sense in a forward looking 
design document, or a open projects file.  It isn't a bug, because it was never 
a feature.


Re: [testsuite]: Skip tests for targets with int < 32 bits

2011-06-13 Thread Jakub Jelinek
On Thu, Jun 09, 2011 at 06:27:56PM +0200, Georg-Johann Lay wrote:
> This patch fixes testsuite failures because the testcases assume
> sizeof(int) >= 4.
> 
>   * gcc.c-torture/compile/pr49029.c: Add dg-require-effective-target
>   int32plus
>   * gcc.c-torture/compile/pr49163.c: Ditto.

> Index: gcc.c-torture/compile/pr49029.c
> ===
> --- gcc.c-torture/compile/pr49029.c   (Revision 174701)
> +++ gcc.c-torture/compile/pr49029.c   (Arbeitskopie)
> @@ -1,3 +1,4 @@
> +/* { dg-require-effective-target int32plus } */
>  /* PR middle-end/49029 */
>  struct S { volatile unsigned f : 11; signed g : 30; } 
> __attribute__((packed));
>  struct T { volatile struct S h; } __attribute__((packed)) a;

Please keep the PR number comment as the first thing in the files.

> Index: gcc.c-torture/compile/pr49163.c
> ===
> --- gcc.c-torture/compile/pr49163.c   (Revision 174701)
> +++ gcc.c-torture/compile/pr49163.c   (Arbeitskopie)
> @@ -1,3 +1,4 @@
> +/* { dg-require-effective-target int32plus } */
>  /* PR target/49163 */
>  struct S1
>  {


Jakub


Re: Cgraph alias reorg 8/14 (ipa-cp and ipa-prop update)

2011-06-13 Thread Jan Hubicka
> Hi,
> 
> On Fri, Jun 10, 2011 at 04:55:43PM +0200, Jan Hubicka wrote:
> > Hi,
> 
> > this patch updated ipa-cp and ipa-prop for aliases.  It is basically
> > easy - we don't analyze nodes represneting aliases and when
> > propagating we skip them, like everywhere else.
> > 
> > There are two problems I noticed.  First we should not propagate
> > through calls that are overwritable.  When non-overwritable function
> > A has overwritable alias B, we should propagate through calls to A,
> > but not throug calls to B.  As discussed on IRC it is probably best
> > to zap the jump functions in question at IPA stage (because at
> > analysis stage it is not clear at all if the alias will end up
> > overwritable with LTO).
> > 
> 
> OK.  Nevertheless, I'd prefer to do this in context of the new
> IPA-CP.  

Yep, lets see how much work will be to get new ipa-cp in.  I will try
to look into it more this week.
> 
> > Similar problem already exists with code in
> > ipa_compute_jump_functions that looks into a callee that might
> > change with LTO.
> 
> I either don't understand or fail to see how this is different from
> the first problem.  We even compute jump functions of indirect edges
> precisely because we hope they will be changed later on...

Well, it is sort of similar. What is new that same function can be now
called in overwritable and non-overwritable way.  The old problem is that
what used to be overwritable at analysis time is not neccesarily so at WPA
time.
> 
> 
> > Index: ipa-cp.c
> > ===
> > --- ipa-cp.c(revision 174905)
> > +++ ipa-cp.c(working copy)
> > @@ -818,7 +828,7 @@ ipcp_iterate_stage (void)
> >  /* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
> > This change should be propagated.  */
> >  {
> > -  gcc_assert (n_cloning_candidates);
> > +  /*gcc_assert (n_cloning_candidates);*/
> >ipcp_propagate_stage ();
> >  }
> >if (dump_file)
> 
> 
> I know this assert can be horribly irritating but so far it has been
> very useful at spotting all kinds of errors at various places.  (In
> fact, you added it :-)
> 
> But as I want to get the whole IPA-CP replaced, I don't care all that
> much.

I am pretty sure I didn't meant to remove it and had the whole thing tested 
with the assert.
The issue was that once aliases are skipped by propagate stage, the lattices 
stay top.
But I already fixed it elsewhere, so I will regtest reverting that hunk.  
Thanks for
pointing this out!

Honza
> 
> Martin


[pph] Fix GC ICE in g++.dg/pph/c1eabi.cc (issue4607047)

2011-06-13 Thread Diego Novillo

We were running into a GC ICE because when reading a sorted_fields_type
field we were never setting the lenght of the vector.

This patch moves the setting of the vector length to the actual
allocation function (sorted_fields_type_new).  It was being done in
finish_struct_1 before, so vectors read from the streamer were getting
garbage in the len field.

This fixes g++.dg/pph/c1eabi.{cc,h}.  Tested on x86_64.  Committed to
branch.


Diego.


cp/ChangeLog.pph

* class.c (sorted_fields_type_new): Set field LEN in the
newly allocated vector.
(finish_struct_1): Remove setting of FIELD_VEC->LEN.

testsuite/ChangeLog.pph

* g++.dg/pph/c1eabi1.h: Remove XFAIL markers.
* g++.dg/pph/c1eabi1.cc: Likewise.
Add assembly mis-comparison expectation.

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index 86ec04b..70a3668 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -5579,8 +5579,12 @@ determine_key_method (tree type)
 struct sorted_fields_type *
 sorted_fields_type_new (int n)
 {
-  return ggc_alloc_sorted_fields_type (sizeof (struct sorted_fields_type)
-  + n * sizeof (tree));
+  struct sorted_fields_type *sft;
+  sft = ggc_alloc_sorted_fields_type (sizeof (struct sorted_fields_type)
+ + n * sizeof (tree));
+  sft->len = n;
+
+  return sft;
 }
 
 
@@ -5714,7 +5718,6 @@ finish_struct_1 (tree t)
   if (n_fields > 7)
 {
   struct sorted_fields_type *field_vec = sorted_fields_type_new (n_fields);
-  field_vec->len = n_fields;
   add_fields_to_record_type (TYPE_FIELDS (t), field_vec, 0);
   qsort (field_vec->elts, n_fields, sizeof (tree),
 field_decl_cmp);
diff --git a/gcc/testsuite/g++.dg/pph/c1eabi1.cc 
b/gcc/testsuite/g++.dg/pph/c1eabi1.cc
index 6271501..b2e9b11 100644
--- a/gcc/testsuite/g++.dg/pph/c1eabi1.cc
+++ b/gcc/testsuite/g++.dg/pph/c1eabi1.cc
@@ -1,7 +1,4 @@
-// { dg-xfail-if "ICE" { "*-*-*" } { "-fpph-map=pph.map" } }
-// { dg-bogus "mathcalls.h:365:1: internal compiler error: Segmentation fault" 
"" { xfail *-*-* } 0 }
-// { dg-prune-output "In file included from " }
-// { dg-prune-output " from " }
 // { dg-options "-w -fpermissive" }
+// pph asm xdiff
 
 #include "c1eabi1.h"
diff --git a/gcc/testsuite/g++.dg/pph/c1eabi1.h 
b/gcc/testsuite/g++.dg/pph/c1eabi1.h
index 151b768..5f5b593 100644
--- a/gcc/testsuite/g++.dg/pph/c1eabi1.h
+++ b/gcc/testsuite/g++.dg/pph/c1eabi1.h
@@ -1,7 +1,3 @@
-// { dg-xfail-if "ICE" { "*-*-*" } { "-fpph-map=pph.map" } }
-// { dg-bogus "mathcalls.h:365:1: internal compiler error: Segmentation fault" 
"" { xfail *-*-* } 0 }
-// { dg-prune-output "In file included " }
-// { dg-prune-output " from " }
 // { dg-options "-w -fpermissive" }
 
 #ifndef __PPH_GUARD_H

--
This patch is available for review at http://codereview.appspot.com/4607047


Re: [google] Enhance Annotalysis to support cloned functions/methods (issue4591066)

2011-06-13 Thread lcwu


http://codereview.appspot.com/4591066/diff/3001/gcc/tree-threadsafe-analyze.c
File gcc/tree-threadsafe-analyze.c (right):

http://codereview.appspot.com/4591066/diff/3001/gcc/tree-threadsafe-analyze.c#newcode1159
gcc/tree-threadsafe-analyze.c:1159: gcc_assert (false);
On 2011/06/11 17:52:51, Diego Novillo wrote:

>+ else
>+  gcc_assert (false);
>+



Change to gcc_unreachable ();


Done.

http://codereview.appspot.com/4591066/diff/3001/gcc/tree-threadsafe-analyze.c#newcode2151
gcc/tree-threadsafe-analyze.c:2151: && gimple_call_num_args(call) > 0)
On 2011/06/11 17:52:51, Diego Novillo wrote:

2147  starting from GCC-4.5.). The clones could be created as

early as when

  2148  constructing SSA. Also note that the parameters of a cloned

method

could
  2149  be optimized away.  */
  2150   if (TREE_CODE (TREE_TYPE (DECL_ORIGIN (fdecl))) == METHOD_TYPE
  2151   && gimple_call_num_args(call) > 0)



Wouldn't it be easier to make fdecl == DECL_ORIGIN (fdecl) earlier in

the

function?



It's OK either way, though.


Yes, my original fix was to make fdecl = DECL_ORIGINAL (fdecl). But I
later changed it to this way because I wanted to tolerate the case where
the base object (i.e. "this" pointer) is an object instead of a pointer
only when fdecl is a clone. (i.e. I don't want to arbitrarily relax it.)
That's why I kept fdecl intact.

http://codereview.appspot.com/4591066/


Re: [google] Enhance Annotalysis to support cloned functions/methods (issue4591066)

2011-06-13 Thread Diego Novillo
On Mon, Jun 13, 2011 at 12:44,   wrote:

>> could
>>  2149      be optimized away.  */
>>  2150   if (TREE_CODE (TREE_TYPE (DECL_ORIGIN (fdecl))) == METHOD_TYPE
>>  2151       && gimple_call_num_args(call) > 0)
>
>> Wouldn't it be easier to make fdecl == DECL_ORIGIN (fdecl) earlier in
>
> the
>>
>> function?
>
>> It's OK either way, though.
>
> Yes, my original fix was to make fdecl = DECL_ORIGINAL (fdecl). But I
> later changed it to this way because I wanted to tolerate the case where
> the base object (i.e. "this" pointer) is an object instead of a pointer
> only when fdecl is a clone. (i.e. I don't want to arbitrarily relax it.)
> That's why I kept fdecl intact.

Ah, makes sense.  Thanks.


Diego.


Re: [Patch, AVR]: Fix PR46779

2011-06-13 Thread Denis Chertykov
2011/6/13 Georg-Johann Lay :
>
> So you think is is pointless/discouraged to give a more realistic
> description of AVR addressing be means of MODE_CODE_BASE_REG_CLASS (instead
> of BASE_REG_CLASS) resp. REGNO_MODE_CODE_OK_FOR_BASE_P?
>
>> Look carefully at `out_movqi_r_mr'.
>> There are even two fake addressing modes:
>> 1. [Y + infinite-dslacement];
>> 2. [X + (0...63)].
>
> Yes, I know. The first is introduced by avr_legitimate_address_p and the
> second appears to be artifact of LEGITIMIZE_RELOAD_ADDRESS.
>
> The changes are basically MODE_CODE_BASE_REG_CLASS (introduced in 4.2) and a
> rewrite of avr_legitimate_address_p. The changes aim at a better addressing
> for X and to minimize fake addresses.
>
>> I have spent a many hours (days, months) to debug GCC (especially avr port
>> and reload) for right addressing modes.
>> I have stopped on this code.
>> AVR have a limited memory addressing and GCC can't handle it in native
>> form.
>> Because of that I have supported a fake adddressing modes.
>
> I assume the code is from prior to 4.2 when REGNO_MODE_CODE_OK_FOR_BASE_P
> and MODE_CODE_BASE_REG_CLASS had not been available so that supporting X
> required some hacking.
> All that would still be fine; however the new register allocator leads to
> code that noone would accept. Accessing a structure through a pointer is not
> uncommon, not even on AVR. So if Z is used for, say accessing flash, X
> appears to be the best register.
>
> The shortcoming in GCC is that there is no way to give costs of addressing
> (TARGET_ADDRESS_COST does different things).
>
> So take a look what avr-gcc compiles here:
>  http://gcc.gnu.org/bugzilla/attachment.cgi?id=22242
> I saw similar complains in forums on the web.
>
>> (Richard Henderson have a different opinion: GCC can, AVR port can't)
>
> What does he mean with that?
>
>> IMHO that three limited pointer registers is not enough for C compiler.
>> Even more with frame pointer it's only two and X is a very limited.
>
> The current implementation has several oddities like
>
> * allowing SUBREGs in avr-legitimate_address_p
> * changing BASE_REG_CLASS on the fly (by means of reload_completed)
>
> isn't that supposed to cause trouble?

You can try to remove all oddities and check results.
Definitely something changed in GCC core since I wrote addressing code.

>>>
>>> * "zero_extendqidi2"
>>> * "zero_extendhidi2"
>>> * "zero_extendsidi2"
>>>
>>> These are "orphan" insns: they deal with DI without having movdi
>>> support so I removed them.
>>
>> This seems strange for me.
>
> As far as I know, to support a mode a respective mov insn is needed, which
> is not the case for DI. I don't know the exact rationale behind that
> (reloading?), just read is on gcc list by Ian Taylor (and also that it is
> stronly discouraged to have more than one mov insn per mode).
>
> So if the requirement to have mov insn is dropped and without the burden to
> implement movdi, it would be rather easy to implement adddi3 and subdi3 for
> avr...

Personally, I don't like to maintain 64bits integers in AVR port, but
may be somebody use it.

Denis.


Re: [4.6 PATCH] Workaround for stack slot sharing problems with unrolling (PR fortran/49103)

2011-06-13 Thread Jakub Jelinek
On Tue, Jun 07, 2011 at 12:24:06PM +0200, Richard Guenther wrote:
> Probably easier and more complete to do
> 
> if (lhs && TREE_CODE (lhs) != SSA_NAME)
>   {
>  tree base = get_base_address (lhs);

Done in the patch below, bootstrapped/regtested again on x86_64-linux and
i686-linux on the 4.6 branch.

> I don't like the patch too much, but it looks reasonable.  At least reverting
> your patch doesn't really fix anything.
> 
> Any opinions from others?

Michael said the same, anyone else has any opinion or can I check it in for
4.6?

2011-06-13  Jakub Jelinek  

PR fortran/49103
* tree.h (DECL_NONSHAREABLE): Define.
(struct tree_decl_common): Change decl_common_unused to
decl_nonshareable_flag.
* cfgexpand.c (expand_used_vars_for_block, clear_tree_used):
Ignore vars with DECL_NONSHAREABLE bit set.
* tree-cfg.c (gimple_duplicate_bb): Set DECL_NONSHAREABLE
on stores to automatic aggregate vars.

* gfortran.dg/pr49103.f90: New test.

--- gcc/tree.h.jj   2011-03-14 14:12:15.0 +0100
+++ gcc/tree.h  2011-05-31 14:05:34.0 +0200
@@ -1330,6 +1330,10 @@ extern void omp_clause_range_check_faile
 #define DECL_READ_P(NODE) \
   (TREE_CHECK2 (NODE, VAR_DECL, PARM_DECL)->decl_common.decl_read_flag)
 
+#define DECL_NONSHAREABLE(NODE) \
+  (TREE_CHECK2 (NODE, VAR_DECL, \
+   RESULT_DECL)->decl_common.decl_nonshareable_flag)
+
 /* In a CALL_EXPR, means that the call is the jump from a thunk to the
thunked-to function.  */
 #define CALL_FROM_THUNK_P(NODE) (CALL_EXPR_CHECK (NODE)->base.protected_flag)
@@ -2787,8 +2791,9 @@ struct GTY(()) tree_decl_common {
  being set.  */
   unsigned decl_read_flag : 1;
 
-  /* Padding so that 'off_align' can be on a 32-bit boundary.  */
-  unsigned decl_common_unused : 1;
+  /* In VAR_DECL or RESULT_DECL set when significant code movement precludes
+ attempting to share the stack slot with some other variable.  */
+  unsigned decl_nonshareable_flag : 1;
 
   /* DECL_OFFSET_ALIGN, used only for FIELD_DECLs.  */
   unsigned int off_align : 8;
--- gcc/cfgexpand.c.jj  2011-05-04 10:46:52.0 +0200
+++ gcc/cfgexpand.c 2011-05-31 14:08:36.0 +0200
@@ -1134,7 +1134,9 @@ expand_used_vars_for_block (tree block, 
 
   /* Expand all variables at this level.  */
   for (t = BLOCK_VARS (block); t ; t = DECL_CHAIN (t))
-if (TREE_USED (t))
+if (TREE_USED (t)
+&& ((TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != RESULT_DECL)
+   || !DECL_NONSHAREABLE (t)))
   expand_one_var (t, toplevel, true);
 
   this_sv_num = stack_vars_num;
@@ -1167,6 +1169,8 @@ clear_tree_used (tree block)
 
   for (t = BLOCK_VARS (block); t ; t = DECL_CHAIN (t))
 /* if (!TREE_STATIC (t) && !DECL_EXTERNAL (t)) */
+if ((TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != RESULT_DECL)
+   || !DECL_NONSHAREABLE (t))
   TREE_USED (t) = 0;
 
   for (t = BLOCK_SUBBLOCKS (block); t ; t = BLOCK_CHAIN (t))
--- gcc/tree-cfg.c.jj   2011-03-14 14:12:15.0 +0100
+++ gcc/tree-cfg.c  2011-06-13 19:34:18.0 +0200
@@ -5117,6 +5117,7 @@ gimple_duplicate_bb (basic_block bb)
 {
   def_operand_p def_p;
   ssa_op_iter op_iter;
+  tree lhs;
 
   stmt = gsi_stmt (gsi);
   if (gimple_code (stmt) == GIMPLE_LABEL)
@@ -5130,6 +5131,24 @@ gimple_duplicate_bb (basic_block bb)
   maybe_duplicate_eh_stmt (copy, stmt);
   gimple_duplicate_stmt_histograms (cfun, copy, cfun, stmt);
 
+  /* When copying around a stmt writing into a local non-user
+aggregate, make sure it won't share stack slot with other
+vars.  */
+  lhs = gimple_get_lhs (stmt);
+  if (lhs && TREE_CODE (lhs) != SSA_NAME)
+   {
+ tree base = get_base_address (lhs);
+ if (base
+ && (TREE_CODE (base) == VAR_DECL
+ || TREE_CODE (base) == RESULT_DECL)
+ && DECL_IGNORED_P (base)
+ && !TREE_STATIC (base)
+ && !DECL_EXTERNAL (base)
+ && (TREE_CODE (base) != VAR_DECL
+ || !DECL_HAS_VALUE_EXPR_P (base)))
+   DECL_NONSHAREABLE (base) = 1;
+   }
+
   /* Create new names for all the definitions created by COPY and
 add replacement mappings for each new name.  */
   FOR_EACH_SSA_DEF_OPERAND (def_p, copy, op_iter, SSA_OP_ALL_DEFS)
--- gcc/testsuite/gfortran.dg/pr49103.f90.jj2011-05-31 13:52:43.0 
+0200
+++ gcc/testsuite/gfortran.dg/pr49103.f90   2011-05-31 13:57:16.0 
+0200
@@ -0,0 +1,19 @@
+! PR fortran/49103
+! { dg-do run }
+  integer :: a(2), b(2), i, j
+  open (10, status='scratch')
+  do j = 1, 2
+a = (/ 0, 0 /)
+b = (/ 1, 1 /)
+do i = 1, 2
+  write (10, *) a
+  write (10, *) b
+end do
+  end do
+  rewind (10)
+  do i = 0, 7
+read (10, *) a
+if (any (a .ne. mod (i, 2))) call abort
+  end do
+  close (10)
+end


Jakub


Re: [gcc patch 1/3] cp-demangle.c: Make `options' a parameter

2011-06-13 Thread Jan Kratochvil
On Mon, 13 Jun 2011 20:06:21 +0200, Ian Lance Taylor wrote:
> The new options parameter should be the second parameter, not the last.

OK, I agree.


> This is OK with those changes.

Checked in.


Thanks,
Jan


http://gcc.gnu.org/viewcvs?view=revision&revision=174999

--- trunk/libiberty/ChangeLog   2011/06/13 19:05:04 174998
+++ trunk/libiberty/ChangeLog   2011/06/13 22:30:02 174999
@@ -1,3 +1,12 @@
+2011-06-13  Jan Kratochvil  
+
+   * cp-demangle.c (struct d_print_info): Remove field options.
+   (d_print_init): Remove parameter options.
+   (cplus_demangle_print_callback): Update all the callers.
+   (d_print_comp, d_print_mod_list, d_print_mod, d_print_function_type)
+   (d_print_array_type, d_print_expr_op, d_print_cast, d_print_subexpr):
+   Add parameter options, update all the callers.
+
 2011-04-20  Jim Meyering  
 
* cp-demint.c (cplus_demangle_v3_components): Remove useless
--- trunk/libiberty/cp-demangle.c   2011/06/13 19:05:04 174998
+++ trunk/libiberty/cp-demangle.c   2011/06/13 22:30:02 174999
@@ -278,8 +278,6 @@
 enum { D_PRINT_BUFFER_LENGTH = 256 };
 struct d_print_info
 {
-  /* The options passed to the demangler.  */
-  int options;
   /* Fixed-length allocated buffer for demangled data, flushed to the
  callback with a NUL termination once full.  */
   char buf[D_PRINT_BUFFER_LENGTH];
@@ -436,7 +434,7 @@
 d_growable_string_callback_adapter (const char *, size_t, void *);
 
 static void
-d_print_init (struct d_print_info *, int, demangle_callbackref, void *);
+d_print_init (struct d_print_info *, demangle_callbackref, void *);
 
 static inline void d_print_error (struct d_print_info *);
 
@@ -454,32 +452,32 @@
 static inline char d_last_char (struct d_print_info *);
 
 static void
-d_print_comp (struct d_print_info *, const struct demangle_component *);
+d_print_comp (struct d_print_info *, int, const struct demangle_component *);
 
 static void
 d_print_java_identifier (struct d_print_info *, const char *, int);
 
 static void
-d_print_mod_list (struct d_print_info *, struct d_print_mod *, int);
+d_print_mod_list (struct d_print_info *, int, struct d_print_mod *, int);
 
 static void
-d_print_mod (struct d_print_info *, const struct demangle_component *);
+d_print_mod (struct d_print_info *, int, const struct demangle_component *);
 
 static void
-d_print_function_type (struct d_print_info *,
+d_print_function_type (struct d_print_info *, int,
const struct demangle_component *,
struct d_print_mod *);
 
 static void
-d_print_array_type (struct d_print_info *,
+d_print_array_type (struct d_print_info *, int,
 const struct demangle_component *,
 struct d_print_mod *);
 
 static void
-d_print_expr_op (struct d_print_info *, const struct demangle_component *);
+d_print_expr_op (struct d_print_info *, int, const struct demangle_component 
*);
 
 static void
-d_print_cast (struct d_print_info *, const struct demangle_component *);
+d_print_cast (struct d_print_info *, int, const struct demangle_component *);
 
 static int d_demangle_callback (const char *, int,
 demangle_callbackref, void *);
@@ -3293,10 +3291,9 @@
 /* Initialize a print information structure.  */
 
 static void
-d_print_init (struct d_print_info *dpi, int options,
-  demangle_callbackref callback, void *opaque)
+d_print_init (struct d_print_info *dpi, demangle_callbackref callback,
+ void *opaque)
 {
-  dpi->options = options;
   dpi->len = 0;
   dpi->last_char = '\0';
   dpi->templates = NULL;
@@ -3392,9 +3389,9 @@
 {
   struct d_print_info dpi;
 
-  d_print_init (&dpi, options, callback, opaque);
+  d_print_init (&dpi, callback, opaque);
 
-  d_print_comp (&dpi, dc);
+  d_print_comp (&dpi, options, dc);
 
   d_print_flush (&dpi);
 
@@ -3537,7 +3534,7 @@
if needed.  */
 
 static void
-d_print_subexpr (struct d_print_info *dpi,
+d_print_subexpr (struct d_print_info *dpi, int options,
 const struct demangle_component *dc)
 {
   int simple = 0;
@@ -3546,7 +3543,7 @@
 simple = 1;
   if (!simple)
 d_append_char (dpi, '(');
-  d_print_comp (dpi, dc);
+  d_print_comp (dpi, options, dc);
   if (!simple)
 d_append_char (dpi, ')');
 }
@@ -3554,7 +3551,7 @@
 /* Subroutine to handle components.  */
 
 static void
-d_print_comp (struct d_print_info *dpi,
+d_print_comp (struct d_print_info *dpi, int options,
   const struct demangle_component *dc)
 {
   if (dc == NULL)
@@ -3568,7 +3565,7 @@
   switch (dc->type)
 {
 case DEMANGLE_COMPONENT_NAME:
-  if ((dpi->options & DMGL_JAVA) == 0)
+  if ((options & DMGL_JAVA) == 0)
d_append_buffer (dpi, dc->u.s_name.s, dc->u.s_name.len);
   else
d_print_java_identifier (dpi, dc->u.s_name.s, dc->u.s_name.len);
@@ -3576,12 +3573,12 @@
 
 case DEMANGLE_COMPONENT_QUAL_NAME:
 case DEMANGLE_COMPONENT_LOCAL_NAME:
-  

Re: [gcc patch 2/3] cp-demangle.c: New DMGL_RET_DROP

2011-06-13 Thread Jan Kratochvil
On Mon, 13 Jun 2011 20:08:40 +0200, Ian Lance Taylor wrote:
> On Thu, Jun 2, 2011 at 7:16 AM, Jan Kratochvil
>  wrote:
> >
> > include/
> > 2011-05-24  Jan Kratochvil  
> >
> >        * demangle.h (DMGL_RET_POSTFIX): Extend the comment.
> >        (DMGL_RET_DROP): New.
> >
> > libiberty/
> > 2011-05-24  Jan Kratochvil  
> >
> >        * cp-demangle.c (d_print_comp) : Do
> >        not pass DMGL_RET_POSTFIX or DMGL_RET_DROP.  Support DMGL_RET_DROP.
> >        * testsuite/demangle-expected: New testcases for --ret-drop.
> >        * testsuite/test-demangle.c: Document --ret-drop in a comment.
> >        (main): New variable ret_drop, fill it, call cplus_demangle with it.
> 
> This is OK.

Checked in:
http://gcc.gnu.org/viewcvs?view=revision&revision=175000


Thanks,
Jan


Re: [gcc patch 3/3] cp-demangle.c: Fix DMGL_RET_POSTFIX for inner func types

2011-06-13 Thread Jan Kratochvil
On Mon, 13 Jun 2011 20:09:57 +0200, Ian Lance Taylor wrote:
> On Thu, Jun 2, 2011 at 7:17 AM, Jan Kratochvil
>  wrote:
> >
> > libiberty/
> > 2011-05-24  Jan Kratochvil  
> >
> >        * cp-demangle.c (d_print_comp) :
> >        Suppress d_print_mod for DMGL_RET_POSTFIX.
> >        * testsuite/demangle-expected: New testcases for --ret-postfix.
> 
> This is OK.

Checked in:
http://gcc.gnu.org/viewcvs?view=revision&revision=175001


Thanks,
Jan


[PATCH, PR 49089] Don't split AVX256 unaligned loads by default on bdver1 and generic

2011-06-13 Thread Fang, Changpeng
Hi,

The patch ( http://gcc.gnu.org/ml/gcc-patches/2011-02/txt00059.txt ) which 
introduces splitting avx256 unaligned loads. 
However, we found that it causes significant regressions for cpu2006 ( 
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49089 ).

In this work, we introduce a tune option that sets splitting unaligned loads 
default only for such CPUs that such splitting
is beneficial.

The patch passed bootstrapping and regression tests on x86_64-unknown-linux-gnu 
system.

Is it OK to commit?

Thanks,

Changpeng From 415012803abf2cac95c067394504c55dd968f4f5 Mon Sep 17 00:00:00 2001
From: Changpeng Fang 
Date: Mon, 13 Jun 2011 13:13:32 -0700
Subject: [PATCH] pr49089: enable avx256 splitting unaligned load only when beneficial

	* config/i386/i386.h (ix86_tune_indices): Introduce
	  X86_TUNE_AVX256_SPLIT_UNALIGNED_LOAD_OPTIMAL.
	  (TARGET_AVX256_SPLIT_UNALIGNED_LOAD_OPTIMAL): New definition.

	* config/i386/i386.c (ix86_tune_features): Add entry for
	  X86_TUNE_AVX256_SPLIT_UNALIGNED_LOAD_OPTIMAL.
	  (ix86_option_override_internal): Enable avx256 unaligned load splitting
	  only when TARGET_AVX256_SPLIT_UNALIGNED_LOAD_OPTIMAL is set.
---
 gcc/config/i386/i386.c |9 +++--
 gcc/config/i386/i386.h |3 +++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7b266b9..d5f358f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2088,7 +2088,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
   /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
  at -O3.  For the moment, the prefetching seems badly tuned for Intel
  chips.  */
-  m_K6_GEODE | m_AMD_MULTIPLE
+  m_K6_GEODE | m_AMD_MULTIPLE,
+
+  /* X86_TUNE_AVX256_SPLIT_UNALIGNED_LOAD_OPTIMAL: Enable splitting 256-bit
+ unaligned load.  It hurts the performance on Bulldozer.  */
+  m_COREI7
 };
 
 /* Feature tests against the various architecture variations.  */
@@ -4194,7 +4198,8 @@ ix86_option_override_internal (bool main_args_p)
 	  if (flag_expensive_optimizations
 	  && !(target_flags_explicit & MASK_VZEROUPPER))
 	target_flags |= MASK_VZEROUPPER;
-	  if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
+	  if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD_OPTIMAL
+	  && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
 	target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
 	  if (!(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
 	target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8badcbb..b2a1bc8 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -312,6 +312,7 @@ enum ix86_tune_indices {
   X86_TUNE_OPT_AGU,
   X86_TUNE_VECTORIZE_DOUBLE,
   X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL,
+  X86_TUNE_AVX256_SPLIT_UNALIGNED_LOAD_OPTIMAL,
 
   X86_TUNE_LAST
 };
@@ -410,6 +411,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_VECTORIZE_DOUBLE]
 #define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \
 	ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
+#define TARGET_AVX256_SPLIT_UNALIGNED_LOAD_OPTIMAL \
+	ix86_tune_features[X86_TUNE_AVX256_SPLIT_UNALIGNED_LOAD_OPTIMAL]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
-- 
1.7.0.4



Re: [PATCH: ARM] PR 45335 Use ldrd and strd to access two consecutive words

2011-06-13 Thread Ramana Radhakrishnan
This caused http://gcc.gnu.org/PR49398.

You might also consider putting the PR numbers in your commit log in
the future that these patches attempt to fix so that the PR's in
bugzilla get the associated commit message. Please add links to this
commit to the individual PRs this commit attempts to fix (PR45335 ,
PR40457).

cheers
Ramana


Re: [PATCH, ARM] Fix ABI for double-precision helpers on single-float-only CPUs

2011-06-13 Thread Ramana Radhakrishnan
>
> I'm re-testing the attached version.

Shouldn't this be backported to release branches - specifically 4.6
(and 4.5 since this is where TARGET_VFP_SINGLE was introduced) ?


cheers
Ramana


[PATCH] Docs: cfg.texi: Correct statement about CFG

2011-06-13 Thread 陳韋任
The patch correct statement about CFG. According to the comment on
the function gimple_expand_cfg in gcc/cfgexpand.c, the tree CFG now
is preserved and expanded to RTL representation.

2011-06-14  Wei-Ren Chen 

  * gcc/doc/cfg.texi: Correct statement about CFG.

Index: cfg.texi
===
--- cfg.texi(revision 175007)
+++ cfg.texi(working copy)
@@ -612,11 +612,10 @@
 code.

 Note that at present, the representation of control flow in the
-@code{tree} representation is discarded before expanding to RTL@.
-Long term the CFG should be maintained and ``expanded'' to the
-RTL representation along with the function @code{tree} itself.
+@code{tree} representation is maintained and ``expanded'' to the RTL
+representation. So the CFG simultaneously consist of basic blocks
+containing RTL and GIMPLE trees.

-
 @node Liveness information
 @section Liveness information
 @cindex Liveness representation

-- 
Wei-Ren Chen (陳韋任)
Computer Systems Lab, Institute of Information Science,
Academia Sinica, Taiwan (R.O.C.)
Tel:886-2-2788-3799 #1667


Re: [patch, libgfortran] PR48906 Wrong rounding results with -m32

2011-06-13 Thread jerry DeLisle

On 06/11/2011 12:23 AM, Thomas Henlich wrote:

I don't agree with this; with the patch we now output 10 significant
digits, whereas 9 is sufficient for a binary->ascii->binary roundtrip.
So please retain the "reduce d by one when E editing is used" thing
for list format and G0. This is just a side effect of using 1PGw.d
format for list format and G0 in order to avoid duplicating code, but
we don't need to follow this particular craziness that is due to how
the scale factor is specified in the standard.


I hadn't noticed this, but I agree with Janne.

It should be easy to implement:

After the switch between F and E editing, we just need to shift the
decimal point and decrement the exponent. No new rounding is required,
because we keep the number of significant digits.



OK, after a little bit of experimentation, I have arrived at the updated patch 
attached.


This has been regression tested and passes all test cases I am aware of.  I also 
have included a new test case gcc/testsuite/gfortran.dg/fmt_g.f90.


OK for trunk?

Jerry
Index: gcc/testsuite/gfortran.dg/fmt_g.f90
===
--- gcc/testsuite/gfortran.dg/fmt_g.f90	(revision 0)
+++ gcc/testsuite/gfortran.dg/fmt_g.f90	(revision 0)
@@ -0,0 +1,88 @@
+! { dg-do run }
+! PR48906 Wrong rounding results with -m32
+! This test case presents various corner cases bumped into while reworking
+! handling of G formatting of reals without using floating point operations.
+  implicit none
+  integer, parameter :: RT = 8
+  character(28) :: buf
+  write(buf, "(rc,f11.2,4x,'u.p.scale_factor;
   dtp->u.p.scale_factor = 1;
   set_fnode_default (dtp, &f, length);
-  write_float (dtp, &f, source , length, 1);
+  write_float (dtp, &f, source , length);
   dtp->u.p.scale_factor = org_scale;
 }
 
@@ -1487,19 +1487,11 @@ void
 write_real_g0 (st_parameter_dt *dtp, const char *source, int length, int d)
 {
   fnode f;
-  int comp_d; 
   set_fnode_default (dtp, &f, length);
   if (d > 0)
 f.u.real.d = d;
-
-  /* Compensate for extra digits when using scale factor, d is not
- specified, and the magnitude is such that E editing is used.  */
-  if (dtp->u.p.scale_factor > 0 && d == 0)
-comp_d = 1;
-  else
-comp_d = 0;
   dtp->u.p.g0_no_blanks = 1;
-  write_float (dtp, &f, source , length, comp_d);
+  write_float (dtp, &f, source , length);
   dtp->u.p.g0_no_blanks = 0;
 }
 
Index: libgfortran/io/write_float.def
===
--- libgfortran/io/write_float.def	(revision 174673)
+++ libgfortran/io/write_float.def	(working copy)
@@ -59,8 +59,22 @@ calculate_sign (st_parameter_dt *dtp, int negative
 }
 
 
-/* Output a real number according to its format which is FMT_G free.  */
+/* Output a real number according to its format.
 
+   What this does:
+
+	- Calculates the number of digits to emit before and after the
+	  decimal point.
+	- Performs the required rounding.
+	- Calculates the number of leading blanks to emit.
+	- Calculates the number of trailing blanks to emit.
+	- Calculates the exponent format if any.
+	- Determines if the sign should be displayed.
+	- Allocates the write buffer according to the width.
+	- Emits the formatted number according to the above.
+
+   buffer contains the digit string when we enter this function.  */
+
 static try
 output_float (st_parameter_dt *dtp, const fnode *f, char *buffer, size_t size, 
 	  int sign_bit, bool zero_flag, int ndigits, int edigits)
@@ -78,8 +92,10 @@ output_float (st_parameter_dt *dtp, const fnode *f
   int nafter;
   /* Number of zeros after the decimal point, whatever the precision.  */
   int nzero_real;
+  /* Flag for optional leading zero, if there is room.  */
   int leadzero;
-  int nblanks;
+  /* Number of leading and trailing blanks.  */
+  int lblanks, tblanks; 
   sign_t sign;
 
   ft = f->format;
@@ -87,7 +103,6 @@ output_float (st_parameter_dt *dtp, const fnode *f
   d = f->u.real.d;
   p = dtp->u.p.scale_factor;
 
-  rchar = '5';
   nzero_real = -1;
 
   /* We should always know the field width and precision.  */
@@ -142,19 +157,57 @@ output_float (st_parameter_dt *dtp, const fnode *f
   expchar = 0;
   break;
 
+case FMT_G:
+  nbefore = 0;
+  nzero = 0;
+  nafter = d;
+  expchar = 0;
+
+  if (e == d)
+	{
+	  if (e > 0)
+	{
+	  nbefore = e;
+	  nafter = d > nbefore ? d - nbefore : 0;
+	}
+	  else
+	expchar = 'E';
+	  break;
+	}
+  else if (e < d && e >= 0)
+	{
+	  nbefore = e;
+	  nafter = d > nbefore ? d - nbefore : 0;
+	  break;
+	}
+  else if (e >= -d && e < 0)
+	{
+	  if (f->u.real.e == -1)
+	nafter = d;
+	  else if (p == 1)
+	{
+	  p = 0;
+	  e--;
+	  nbefore++;
+	  nafter--;
+	  expchar = 'E';
+	  break;
+	}
+	}
+/* Fall through.  */
 case FMT_E:
 case FMT_D:
-  i = dtp->u.p.scale_factor;
   if (d <= 0 && p == 0)
 	{
 	  generate_error (&dtp->common, LIBERROR

Go patch committed: Lower builtin make

2011-06-13 Thread Ian Lance Taylor
This patch lowers calls to the builtin make function to runtime calls at
lowering time, rather than at backend conversion time.  This patch also
adjusts type descriptors to use kindNoPointers aka GO_NO_POINTERS as the
6g library does.  This lets the runtime memory allocation code quickly
check whether a particular type can contain pointers.  Bootstrapped and
ran Go testsuite on x86_64-unknown-linux-gnu.  Committed to mainline

Ian


2011-06-13  Ian Lance Taylor  

* Make-lang.in (go/expressions.o): Depend on $(GO_RUNTIME_H).


Index: gcc/go/Make-lang.in
===
--- gcc/go/Make-lang.in	(revision 174941)
+++ gcc/go/Make-lang.in	(working copy)
@@ -255,7 +255,7 @@ go/expressions.o: go/gofrontend/expressi
 	intl.h $(TREE_H) $(GIMPLE_H) tree-iterator.h convert.h $(REAL_H) \
 	realmpfr.h $(GO_C_H) $(GO_GOGO_H) $(GO_TYPES_H) \
 	go/gofrontend/export.h $(GO_IMPORT_H) $(GO_STATEMENTS_H) $(GO_LEX_H) \
-	go/gofrontend/backend.h $(GO_EXPRESSIONS_H)
+	$(GO_RUNTIME_H) go/gofrontend/backend.h $(GO_EXPRESSIONS_H)
 go/go.o: go/gofrontend/go.cc $(GO_SYSTEM_H) $(GO_C_H) $(GO_LEX_H) \
 	$(GO_PARSE_H) go/gofrontend/backend.h $(GO_GOGO_H)
 go/go-dump.o: go/gofrontend/go-dump.cc $(GO_SYSTEM_H) $(GO_C_H) \
Index: gcc/go/gofrontend/runtime.def
===
--- gcc/go/gofrontend/runtime.def	(revision 174810)
+++ gcc/go/gofrontend/runtime.def	(working copy)
@@ -65,8 +65,14 @@ DEF_GO_RUNTIME(STRING_TO_INT_ARRAY, "__g
 	   P1(STRING), R1(SLICE))
 
 
+// Make a slice.
+DEF_GO_RUNTIME(MAKESLICE1, "__go_make_slice1", P2(TYPE, UINTPTR), R1(SLICE))
+DEF_GO_RUNTIME(MAKESLICE2, "__go_make_slice2", P3(TYPE, UINTPTR, UINTPTR),
+	   R1(SLICE))
+
+
 // Make a map.
-DEF_GO_RUNTIME(NEW_MAP, "__go_new_map", P2(TYPE, UINTPTR), R1(MAP))
+DEF_GO_RUNTIME(MAKEMAP, "__go_new_map", P2(MAPDESCRIPTOR, UINTPTR), R1(MAP))
 
 // Build a map from a composite literal.
 DEF_GO_RUNTIME(CONSTRUCT_MAP, "__go_construct_map",
@@ -103,7 +109,7 @@ DEF_GO_RUNTIME(MAPITERNEXT, "runtime.map
 
 
 // Make a channel.
-DEF_GO_RUNTIME(NEW_CHANNEL, "__go_new_channel", P2(UINTPTR, UINTPTR), R1(CHAN))
+DEF_GO_RUNTIME(MAKECHAN, "__go_new_channel", P2(TYPE, UINTPTR), R1(CHAN))
 
 // Get the length of a channel (the number of unread values).
 DEF_GO_RUNTIME(CHAN_LEN, "__go_chan_len", P1(CHAN), R1(INT))
Index: gcc/go/gofrontend/types.h
===
--- gcc/go/gofrontend/types.h	(revision 174943)
+++ gcc/go/gofrontend/types.h	(working copy)
@@ -78,6 +78,8 @@ static const int RUNTIME_TYPE_KIND_STRIN
 static const int RUNTIME_TYPE_KIND_STRUCT = 25;
 static const int RUNTIME_TYPE_KIND_UNSAFE_POINTER = 26;
 
+static const int RUNTIME_TYPE_KIND_NO_POINTERS = (1 << 7);
+
 // To build the complete list of methods for a named type we need to
 // gather all methods from anonymous fields.  Those methods may
 // require an arbitrary set of indirections and field offsets.  There
@@ -811,13 +813,6 @@ class Type
   is_unexported_field_or_method(Gogo*, const Type*, const std::string&,
 std::vector*);
 
-  // This type was passed to the builtin function make.  ARGS are the
-  // arguments passed to make after the type; this may be NULL if
-  // there were none.  Issue any required errors.
-  bool
-  check_make_expression(Expression_list* args, source_location location)
-  { return this->do_check_make_expression(args, location); }
-
   // Convert the builtin named types.
   static void
   convert_builtin_named_types(Gogo*);
@@ -826,12 +821,6 @@ class Type
   Btype*
   get_backend(Gogo*);
 
-  // Return a tree for a make expression applied to this type.
-  tree
-  make_expression_tree(Translate_context* context, Expression_list* args,
-		   source_location location)
-  { return this->do_make_expression_tree(context, args, location); }
-
   // Build a type descriptor entry for this type.  Return a pointer to
   // it.  The location is the location which causes us to need the
   // entry.
@@ -878,16 +867,9 @@ class Type
   virtual unsigned int
   do_hash_for_method(Gogo*) const;
 
-  virtual bool
-  do_check_make_expression(Expression_list* args, source_location);
-
   virtual Btype*
   do_get_backend(Gogo*) = 0;
 
-  virtual tree
-  do_make_expression_tree(Translate_context*, Expression_list*,
-			  source_location);
-
   virtual Expression*
   do_type_descriptor(Gogo*, Named_type* name) = 0;
 
@@ -901,10 +883,6 @@ class Type
   virtual void
   do_export(Export*) const;
 
-  // Return whether an expression is an integer.
-  static bool
-  check_int_value(Expression*, const char*, source_location);
-
   // Return whether a method expects a pointer as the receiver.
   static bool
   method_expects_pointer(const Named_object*);
@@ -2083,16 +2061,9 @@ class Array_type : public Type
   unsigned int
   do_hash_for_method(Gogo*) const;
 
-  bool
-  do_check_make_expression(Expression_list*, source_location);
-
   Btype*
 

[google] Add intermediate text format for gcov (issue4595053)

2011-06-13 Thread Sharad Singhai
This patch adds an intermediate gcov text format which does not require
source code. This format can be used by lcov or other tools.

I have bootstrapped it on x86 and all tests pass. Okay for main?

Thanks,
Sharad

2011-06-13   Sharad Singhai  

Google Ref 3

* doc/gcov.texi: Document gcov intermediate format.
* gcov.c (get_gcov_file_intermediate_name): New function.
(output_intermediate_file): New function.
* testsuite/g++.dg/gcov/gcov-7.C: New test.


Index: doc/gcov.texi
===
--- doc/gcov.texi   (revision 174926)
+++ doc/gcov.texi   (working copy)
@@ -130,6 +130,7 @@
  [@option{-f}|@option{--function-summaries}]
  [@option{-o}|@option{--object-directory} @var{directory|file}] 
@var{sourcefiles}
  [@option{-u}|@option{--unconditional-branches}]
+ [@option{-i}|@option{--intermediate-format}]
  [@option{-d}|@option{--display-progress}]
 @c man end
 @c man begin SEEALSO
@@ -216,6 +217,12 @@
 @itemx --display-progress
 Display the progress on the standard output.
 
+@item -i
+@itemx --intermediate-format
+Output gcov file in an intermediate text format that can be used by
+@command{lcov} or other applications. It will output a single *.gcov file per
+*gcda file. No source code is required.
+
 @end table
 
 @command{gcov} should be run with the current directory the same as that
Index: gcov.c
===
--- gcov.c  (revision 174926)
+++ gcov.c  (working copy)
@@ -38,6 +38,7 @@
 #include "tm.h"
 #include "intl.h"
 #include "version.h"
+#include "demangle.h"
 
 #include 
 
@@ -310,6 +311,9 @@
 
 static int flag_display_progress = 0;
 
+/* Output *.gcov file in intermediate format used by 'lcov'.  */
+static int flag_intermediate_format = 0;
+
 /* For included files, make the gcov output file name include the name
of the input source file.  For example, if x.h is included in a.c,
then the output file name is a.c##x.h.gcov instead of x.h.gcov.  */
@@ -436,6 +440,11 @@
   fnotice (file, "  -o, --object-directory DIR|FILE Search for object files in 
DIR or called FILE\n");
   fnotice (file, "  -p, --preserve-pathsPreserve all pathname 
components\n");
   fnotice (file, "  -u, --unconditional-branchesShow unconditional branch 
counts too\n");
+  fnotice (file, "  -i, --intermediate-format   Output .gcov file in an 
intermediate text\n\
+format that can be used by 'lcov' or 
other\n\
+applications.  It will output a single\n\
+.gcov file per .gcda file.  No source 
file\n\
+is required.\n");
   fnotice (file, "  -d, --display-progress  Display progress 
information\n");
   fnotice (file, "\nFor bug reporting instructions, please see:\n%s.\n",
   bug_report_url);
@@ -472,6 +481,7 @@
   { "object-file",  required_argument, NULL, 'o' },
   { "unconditional-branches", no_argument, NULL, 'u' },
   { "display-progress", no_argument,   NULL, 'd' },
+  { "intermediate-format",  no_argument,   NULL, 'i' },
   { 0, 0, 0, 0 }
 };
 
@@ -482,7 +492,8 @@
 {
   int opt;
 
-  while ((opt = getopt_long (argc, argv, "abcdfhlno:puv", options, NULL)) != 
-1)
+  while ((opt = getopt_long (argc, argv, "abcdfhilno:puv", options, NULL)) !=
+ -1)
 {
   switch (opt)
{
@@ -516,6 +527,10 @@
case 'u':
  flag_unconditional = 1;
  break;
+   case 'i':
+  flag_intermediate_format = 1;
+  flag_gcov_file = 1;
+  break;
 case 'd':
   flag_display_progress = 1;
   break;
@@ -531,6 +546,109 @@
   return optind;
 }
 
+/* Get the name of the gcov file.  The return value must be free'd.
+
+   It appends the '.gcov' extension to the *basename* of the file.
+   The resulting file name will be in PWD.
+
+   e.g.,
+   input: foo.da,   output: foo.da.gcov
+   input: a/b/foo.cc,   output: foo.cc.gcov  */
+
+static char *
+get_gcov_file_intermediate_name (const char *file_name)
+{
+  const char *gcov = ".gcov";
+  char *result;
+  const char *cptr;
+
+  /* Find the 'basename'.  */
+  cptr = lbasename (file_name);
+
+  result = XNEWVEC(char, strlen (cptr) + strlen (gcov) + 1);
+  sprintf (result, "%s%s", cptr, gcov);
+
+  return result;
+}
+
+/* Output the result in intermediate format used by 'lcov'.
+
+This format contains a single file named 'foo.cc.gcov', with no source
+code included.
+
+SF:/home/.../foo.h
+DA:10,1
+DA:30,0
+DA:35,1
+SF:/home/.../bar.h
+DA:12,0
+DA:33,0
+DA:55,1
+SF:/home/.../foo.cc
+FN:30,
+FNDA:2,
+DA:42,0
+DA:53,1
+BA:55,1
+BA:55,2
+DA:95,1
+...
+
+The default format contains 3 separate files: 'foo.h.gcov', 'foo.cc.gcov',
+'bar.h.gcov', each with source code included.  */
+
+static void
+output_intermediate_file (FILE *gcov_file, so

Re: [PATCH, SMS 1/4] Fix calculation of row_rest_count

2011-06-13 Thread Revital Eres
Hello,

> Please add the following:
> o A clarification that rows_length is used only (as an optimization) to
> back off quickly from trying to schedule a node in a full row; that is, to
> avoid running through futile DFA state transitions.
> o An assert that ps->rows_length[i] equals the number of nodes in ps->rows
> [i] (e.g., in verify_partial_schedule(); and then recheck...).

Done. Besides the additions to address your comments I also added two
more bits to update rows_length field into rotate_partial_schedule ()
and ps_insert_empty_row () that were missing in the previous
implementation of the patch.

The patch was re-tested on top of the patch to fix violation of memory
dependence (http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00960.html)
as follows: On ppc64-redhat-linux regtest as well as bootstrap with
SMS flags, enabling SMS also on loops with stage count 1.  Regtested
on SPU.
On arm-linux-gnueabi bootstrap c language with SMS
flags enabling SMS also on loops with stage count 1 and currently
regtseted on c,c++.

OK for mainline with these changes once regtests on arm-linux-gnueabi completes?

Thanks,
Revital


* modulo-sched.c (struct ps_insn): Remove row_rest_count field.
(struct partial_schedule): Add rows_length field.
(verify_partial_schedule): Check rows_length.
(ps_insert_empty_row): Handle rows_length.
(create_partial_schedule): Likewise.
(free_partial_schedule): Likewise.
(reset_partial_schedule): Likewise.
(create_ps_insn): Remove rest_count argument.
(remove_node_from_ps): Update rows_length.
(add_node_to_ps): Update rows_length and call create_ps_insn
without passing row_rest_count.
(rotate_partial_schedule): Update rows_length.
Index: modulo-sched.c
===
--- modulo-sched.c  (revision 173786)
+++ modulo-sched.c  (working copy)
@@ -134,8 +134,6 @@ struct ps_insn
   ps_insn_ptr next_in_row,
  prev_in_row;
 
-  /* The number of nodes in the same row that come after this node.  */
-  int row_rest_count;
 };
 
 /* Holds the partial schedule as an array of II rows.  Each entry of the
@@ -149,6 +147,12 @@ struct partial_schedule
   /* rows[i] points to linked list of insns scheduled in row i (0<=iii;
   int new_ii = ii + 1;
   int row;
+  int *rows_length_new;
 
   verify_partial_schedule (ps, sched_nodes);
 
@@ -1922,9 +1927,11 @@ ps_insert_empty_row (partial_schedule_pt
   rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
 
   rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
+  rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
   for (row = 0; row < split_row; row++)
 {
   rows_new[row] = ps->rows[row];
+  rows_length_new[row] = ps->rows_length[row];
   ps->rows[row] = NULL;
   for (crr_insn = rows_new[row];
   crr_insn; crr_insn = crr_insn->next_in_row)
@@ -1945,6 +1952,7 @@ ps_insert_empty_row (partial_schedule_pt
   for (row = split_row; row < ii; row++)
 {
   rows_new[row + 1] = ps->rows[row];
+  rows_length_new[row + 1] = ps->rows_length[row];
   ps->rows[row] = NULL;
   for (crr_insn = rows_new[row + 1];
   crr_insn; crr_insn = crr_insn->next_in_row)
@@ -1966,6 +1974,8 @@ ps_insert_empty_row (partial_schedule_pt
 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
   free (ps->rows);
   ps->rows = rows_new;
+  free (ps->rows_length);
+  ps->rows_length = rows_length_new;
   ps->ii = new_ii;
   gcc_assert (ps->min_cycle >= 0);
 
@@ -2041,16 +2051,23 @@ verify_partial_schedule (partial_schedul
   ps_insn_ptr crr_insn;
 
   for (row = 0; row < ps->ii; row++)
-for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
-  {
-   ddg_node_ptr u = crr_insn->node;
-
-   gcc_assert (TEST_BIT (sched_nodes, u->cuid));
-   /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
-  popcount (sched_nodes) == number of insns in ps.  */
-   gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
-   gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
-  }
+{
+  int length = 0;
+  
+  for (crr_insn = ps->rows[row]; crr_insn; crr_insn = 
crr_insn->next_in_row)
+   {
+ ddg_node_ptr u = crr_insn->node;
+ 
+ length++;
+ gcc_assert (TEST_BIT (sched_nodes, u->cuid));
+ /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
+popcount (sched_nodes) == number of insns in ps.  */
+ gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
+ gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
+   }
+  
+  gcc_assert (ps->rows_length[row] == length);
+}
 }
 
 
@@ -2456,6 +2473,7 @@ create_partial_schedule (int ii, ddg_ptr
 {
   partial_schedule_ptr ps = XNEW (struct partial_schedule);
   ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
+  ps->rows_length = (int *) xcalloc (ii, sizeof (int