[patch] Use coretypes types in headers

2012-07-11 Thread Steven Bosscher
Hello,

With coretypes included everywhere, there's no need to use the
underlying "struct blah *" method anymore in almost all places.

Bootstrapped&tested on powerpc64-unknown-linux-gnu. OK?

Ciao!
Steven

* basic-block.h (struct edge_def): Use basic_block instead of
basic_block_def *.
* cfgloop.h (struct loop_exit, struct loop): Likewise.
* gengenrtl.c (type_from_format): Likewise.  Also for 'tree'
instead of union tree_node *.
* rtl.h (union rtunion_def, emit_insn_before_noloc,
emit_insn_after_noloc, add_insn_before, add_insn_after,
debug_bb_slim): Likewise.
* tree-inline.h (struct copy_body_data): Likewise.
* sched-rgn.c (dump_region_dot): Likewise.
* gimple.h (struct gimple_statement_base, gimple_set_bb,
gsi_move_to_bb_end): Likewise.
* sched-vis.c (debug_bb_slim): Likewise.
(debug_bb_n_slim): Likewise.
* config/mn10300/mn10300.c (mn10300_insert_setlb_lcc): Likewise.
(mn10300_block_contains_call):

Index: basic-block.h
===
--- basic-block.h   (revision 189365)
+++ basic-block.h   (working copy)
@@ -35,8 +35,8 @@ typedef HOST_WIDEST_INT gcov_type;
 /* Control flow edge information.  */
 struct GTY(()) edge_def {
   /* The two blocks at the ends of the edge.  */
-  struct basic_block_def *src;
-  struct basic_block_def *dest;
+  basic_block src;
+  basic_block dest;

   /* Instructions queued on the edge.  */
   union edge_def_insns {
@@ -161,8 +161,8 @@ struct GTY((chain_next ("%h.next_bb"), c
   struct et_node * GTY ((skip (""))) dom[2];

   /* Previous and next blocks in the chain.  */
-  struct basic_block_def *prev_bb;
-  struct basic_block_def *next_bb;
+  basic_block prev_bb;
+  basic_block next_bb;

   union basic_block_il_dependent {
   struct gimple_bb_info GTY ((tag ("0"))) gimple;
Index: cfgloop.h
===
--- cfgloop.h   (revision 189365)
+++ cfgloop.h   (working copy)
@@ -80,7 +80,7 @@ struct GTY ((chain_next ("%h.next"))) nb

 struct GTY (()) loop_exit {
   /* The exit edge.  */
-  struct edge_def *e;
+  edge e;

   /* Previous and next exit in the list of the exits of the loop.  */
   struct loop_exit *prev;
@@ -114,10 +114,10 @@ struct GTY ((chain_next ("%h.next"))) lo
   unsigned ninsns;

   /* Basic block of loop header.  */
-  struct basic_block_def *header;
+  basic_block header;

   /* Basic block of loop latch.  */
-  struct basic_block_def *latch;
+  basic_block latch;

   /* For loop unrolling/peeling decision.  */
   struct lpt_decision lpt_decision;
Index: gengenrtl.c
===
--- gengenrtl.c (revision 189365)
+++ gengenrtl.c (working copy)
@@ -64,9 +64,9 @@ type_from_format (int c)
 case 'E':
   return "rtvec ";
 case 't':
-  return "union tree_node *";  /* tree - typedef not available */
+  return "tree ";
 case 'B':
-  return "struct basic_block_def *";  /* basic block - typedef
not available */
+  return "basic_block ";
 default:
   gcc_unreachable ();
 }
Index: rtl.h
===
--- rtl.h   (revision 189365)
+++ rtl.h   (working copy)
@@ -196,7 +196,7 @@ union rtunion_def
   addr_diff_vec_flags rt_addr_diff_vec_flags;
   struct cselib_val_struct *rt_cselib;
   tree rt_tree;
-  struct basic_block_def *rt_bb;
+  basic_block rt_bb;
   mem_attrs *rt_mem;
   reg_attrs *rt_reg;
   struct constant_descriptor_rtx *rt_constant;
@@ -1735,7 +1735,7 @@ extern rtx assign_temp (tree, int, int);

 /* In emit-rtl.c */
 extern rtx emit_insn_before (rtx, rtx);
-extern rtx emit_insn_before_noloc (rtx, rtx, struct basic_block_def *);
+extern rtx emit_insn_before_noloc (rtx, rtx, basic_block);
 extern rtx emit_insn_before_setloc (rtx, rtx, int);
 extern rtx emit_jump_insn_before (rtx, rtx);
 extern rtx emit_jump_insn_before_noloc (rtx, rtx);
@@ -1750,7 +1750,7 @@ extern rtx emit_barrier_before (rtx);
 extern rtx emit_label_before (rtx, rtx);
 extern rtx emit_note_before (enum insn_note, rtx);
 extern rtx emit_insn_after (rtx, rtx);
-extern rtx emit_insn_after_noloc (rtx, rtx, struct basic_block_def *);
+extern rtx emit_insn_after_noloc (rtx, rtx, basic_block);
 extern rtx emit_insn_after_setloc (rtx, rtx, int);
 extern rtx emit_jump_insn_after (rtx, rtx);
 extern rtx emit_jump_insn_after_noloc (rtx, rtx);
@@ -2435,8 +2435,8 @@ extern void unshare_all_rtl_in_chain (rt
 extern void verify_rtl_sharing (void);
 extern void link_cc0_insns (rtx);
 extern void add_insn (rtx);
-extern void add_insn_before (rtx, rtx, struct basic_block_def *);
-extern void add_insn_after (rtx, rtx, struct basic_block_def *);
+extern void add_insn_before (rtx, rtx, basic_block);
+extern void add_insn_after (rtx, rtx, basic_block);
 extern void remove_insn (rtx);
 extern rtx emit (rtx);
 extern voi

Re: RFA: Revert revision 164552

2012-07-11 Thread H.J. Lu
On Wed, Dec 1, 2010 at 1:32 AM, Bernd Schmidt  wrote:
> On 11/03/2010 04:02 PM, Diego Novillo wrote:
>> On Wed, Nov 3, 2010 at 10:02, Rainer Orth  
>> wrote:
>>> H.J.,
>>>
 Revision 164552:

 http://gcc.gnu.org/ml/gcc-cvs/2010-09/msg00849.html

 which fixes:

 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=44374

 a mixed optimization bug, but caused many failures:

 http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46257

 including bootstrap failures on x86. It was reported more than a month ago
 and nothing has changed.  Should it be reverted for now?
>>>
>>> I think it should: it probably also caused PR bootstrap/46018 and
>>> certainly PR rtl-optimization/46114, where Bernd indicated that he would
>>> be away for several weeks.
>>
>> I agree.  Let's revert the patch.  Bernd should be able to figure out
>> a fix after he gets back.
>
> Now that there are no more vacations and other problems to stop me from
> fixing it, I propose that the patch be reapplied in the form below.
>
> Fixes:
> PR46114: Reported fixed by the patch in PR46238, which is included in
> the patch below.
> PR45816, PR45801: Needed a debug-insns fix from aoliva, which has been
> committed in the meantime.
> PR45865: Fixed by testing for NOTE_INSN_EPILOGUE_BEG in two places so as
> to not merge epilogues.
> PR46018: Doesn't look like the original failure was caused by my patch.
> I'm guessing the PR46114 fix is the solution here as well (as Rainer
> reported no problems on Solaris with that fix applied).
>
> I've also made some changes to preserve an unrelated ifcvt fix from Eric
> that has been applied in the same area in the meantime.
>
> Bootstrapped and regression tested on i686-linux, all languages. Rainer,
> it would be helpful if you could do a Solaris test.
>

This caused:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53908


-- 
H.J.


[patch] A few more header cleanups

2012-07-11 Thread Steven Bosscher
Hello,

This is another round of small header include cleanups.

Note: I'm not going through this work completely at random, but it's
sometimes hard to keep an overview of all the changes which is why I'm
sending them in bits. For example, moving the can_move_by_pieces
prototype really doesn't improve things at the surface, but it helps
expose what is needed by what, i.e. define the interfaces.
Longer-term, it should be easier to move/regroup code once the
interfaces are clear...

Bootstrapped&tested on x86_64-unknown-linux-gnu, and build
cross-compilers (cc1) to sparc-linux and ia64-linux. OK for trunk?

Ciao!
Steven


gcc/
* expr.h (can_move_by_pieces): Move prototype from here ...
* tree.h (can_move_by_pieces): ... to here.
* optabs.h (set_widening_optab_handler): Use XCNEW.
* gimplify.c: Do not include expr.h.

* toplev.c: Do not include dwarf2out.h.
* config/ia64/ia64.c: Likewise.
* config/sparc/sparc.c: Likewise.

c-family/
* c-family/c-gimplify.c: Do not include basic-block.h.
* c-family/c-common.c: Do not include libfuncs.h.

cp/
* cp/method.c: Do not include tree-pass.h.

fortran/
* fortran/trans.c: Do not include defaults.h.
* fortran/trans-intrinsic.c: Likewise.

java/
* java/decl.c: Do not include libfuncs.h.
* class.c: Do not include defaults.h.
* jvgenmain.c: Likewise.
* magnle.c: Likewise.

Index: expr.h
===
--- expr.h  (revision 189365)
+++ expr.h  (working copy)
@@ -369,11 +369,6 @@ rtx set_storage_via_libcall (rtx, rtx, r
 extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
unsigned int, HOST_WIDE_INT);

-/* Determine whether the LEN bytes can be moved by using several move
-   instructions.  Return nonzero if a call to move_by_pieces should
-   succeed.  */
-extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
-
 extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
 unsigned int,
 unsigned int);
Index: tree.h
===
--- tree.h  (revision 189365)
+++ tree.h  (working copy)
@@ -5886,6 +5886,12 @@ extern void fini_object_sizes (void);
 extern unsigned HOST_WIDE_INT compute_builtin_object_size (tree, int);

 /* In expr.c.  */
+
+/* Determine whether the LEN bytes can be moved by using several move
+   instructions.  Return nonzero if a call to move_by_pieces should
+   succeed.  */
+extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
+
 extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
 extern tree build_personality_function (const char *);

Index: optabs.h
===
--- optabs.h(revision 189366)
+++ optabs.h(working copy)
@@ -1051,8 +1051,7 @@ set_widening_optab_handler (optab op, en
   else
 {
   if (op->widening == NULL)
-   op->widening = (struct widening_optab_handlers *)
- xcalloc (1, sizeof (struct widening_optab_handlers));
+   op->widening = XCNEW (struct widening_optab_handlers);

   op->widening->handlers[(int) to_mode][(int) from_mode].insn_code = code;
 }
Index: gimplify.c
===
--- gimplify.c  (revision 189365)
+++ gimplify.c  (working copy)
@@ -47,8 +47,6 @@ along with GCC; see the file COPYING3.
 #include "tree-pass.h"

 #include "langhooks-def.h" /* FIXME: for lhd_set_decl_assembler_name.  */
-#include "expr.h"  /* FIXME: for can_move_by_pieces
-  and STACK_CHECK_MAX_VAR_SIZE.  */

 enum gimplify_omp_var_data
 {
@@ -3970,7 +3968,7 @@ gimplify_init_constructor (tree *expr_p,
walk_tree (&DECL_INITIAL (object), force_labels_r, NULL, NULL);

/* ??? C++ doesn't automatically append a . to the
-  assembler name, and even when it does, it looks a FE private
+  assembler name, and even when it does, it looks at FE private
   data structures to figure out what that number should be,
   which are not set for this variable.  I suppose this is
   important for local statics for inline functions, which aren't
Index: toplev.c
===
--- toplev.c(revision 189365)
+++ toplev.c(working copy)
@@ -76,10 +76,6 @@ along with GCC; see the file COPYING3.
 #include "tree-ssa-alias.h"
 #include "plugin.h"

-#if defined (DWARF2_UNWIND_INFO) || defined (DWARF2_DEBUGGING_INFO)
-#include "dwarf2out.h"
-#endif
-
 #if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO)
 #include "dbxout.h"
 #endif
Index: cgraph.c

Re: [Patch, ARM] Fix PR53859: ICE on armv7e-m

2012-07-11 Thread Ramana Radhakrishnan
On 10 July 2012 13:20, Greta Yorsh  wrote:
> New RTL patterns generated for epilogues with RETURN (trunk r188742) are not
> recognized by the pattern matching code in arm_early_load_addr_dep, which is
> used for insn latency calculation when tuning for cortex-m4. It causes an
> ICE when tuning for armv7e-m or cortex-m4:
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53859.
>
> The obvious fix is to detect RETURN pattern in arm_early_load_addr_dep.
>
> No regression on qemu.
>
> Ok for trunk?

Ok .

Thanks,
Ramana

>
> Thanks,
> Greta
>
> ChangeLog
>
> 2012-07-10  Greta Yorsh  
>
> gcc/
> PR target/53859
> * config/arm/arm.c (arm_early_load_addr_dep): Handle new
> epilogue patterns.
>
> gcc/testsuite
>
> PR target/53859
> * gcc.target/arm/pr53859.c: New test.


Re: [PATCH] Fix breakage for m68k-linux introduced by 4a020a8 / r189359

2012-07-11 Thread Andreas Schwab
This is already fixed.

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."


Re: [patch] Use coretypes types in headers

2012-07-11 Thread Richard Guenther
On Wed, Jul 11, 2012 at 9:31 AM, Steven Bosscher  wrote:
> Hello,
>
> With coretypes included everywhere, there's no need to use the
> underlying "struct blah *" method anymore in almost all places.
>
> Bootstrapped&tested on powerpc64-unknown-linux-gnu. OK?

Ok!

Thanks,
Richard.

> Ciao!
> Steven
>
> * basic-block.h (struct edge_def): Use basic_block instead of
> basic_block_def *.
> * cfgloop.h (struct loop_exit, struct loop): Likewise.
> * gengenrtl.c (type_from_format): Likewise.  Also for 'tree'
> instead of union tree_node *.
> * rtl.h (union rtunion_def, emit_insn_before_noloc,
> emit_insn_after_noloc, add_insn_before, add_insn_after,
> debug_bb_slim): Likewise.
> * tree-inline.h (struct copy_body_data): Likewise.
> * sched-rgn.c (dump_region_dot): Likewise.
> * gimple.h (struct gimple_statement_base, gimple_set_bb,
> gsi_move_to_bb_end): Likewise.
> * sched-vis.c (debug_bb_slim): Likewise.
> (debug_bb_n_slim): Likewise.
> * config/mn10300/mn10300.c (mn10300_insert_setlb_lcc): Likewise.
> (mn10300_block_contains_call):
>
> Index: basic-block.h
> ===
> --- basic-block.h   (revision 189365)
> +++ basic-block.h   (working copy)
> @@ -35,8 +35,8 @@ typedef HOST_WIDEST_INT gcov_type;
>  /* Control flow edge information.  */
>  struct GTY(()) edge_def {
>/* The two blocks at the ends of the edge.  */
> -  struct basic_block_def *src;
> -  struct basic_block_def *dest;
> +  basic_block src;
> +  basic_block dest;
>
>/* Instructions queued on the edge.  */
>union edge_def_insns {
> @@ -161,8 +161,8 @@ struct GTY((chain_next ("%h.next_bb"), c
>struct et_node * GTY ((skip (""))) dom[2];
>
>/* Previous and next blocks in the chain.  */
> -  struct basic_block_def *prev_bb;
> -  struct basic_block_def *next_bb;
> +  basic_block prev_bb;
> +  basic_block next_bb;
>
>union basic_block_il_dependent {
>struct gimple_bb_info GTY ((tag ("0"))) gimple;
> Index: cfgloop.h
> ===
> --- cfgloop.h   (revision 189365)
> +++ cfgloop.h   (working copy)
> @@ -80,7 +80,7 @@ struct GTY ((chain_next ("%h.next"))) nb
>
>  struct GTY (()) loop_exit {
>/* The exit edge.  */
> -  struct edge_def *e;
> +  edge e;
>
>/* Previous and next exit in the list of the exits of the loop.  */
>struct loop_exit *prev;
> @@ -114,10 +114,10 @@ struct GTY ((chain_next ("%h.next"))) lo
>unsigned ninsns;
>
>/* Basic block of loop header.  */
> -  struct basic_block_def *header;
> +  basic_block header;
>
>/* Basic block of loop latch.  */
> -  struct basic_block_def *latch;
> +  basic_block latch;
>
>/* For loop unrolling/peeling decision.  */
>struct lpt_decision lpt_decision;
> Index: gengenrtl.c
> ===
> --- gengenrtl.c (revision 189365)
> +++ gengenrtl.c (working copy)
> @@ -64,9 +64,9 @@ type_from_format (int c)
>  case 'E':
>return "rtvec ";
>  case 't':
> -  return "union tree_node *";  /* tree - typedef not available */
> +  return "tree ";
>  case 'B':
> -  return "struct basic_block_def *";  /* basic block - typedef
> not available */
> +  return "basic_block ";
>  default:
>gcc_unreachable ();
>  }
> Index: rtl.h
> ===
> --- rtl.h   (revision 189365)
> +++ rtl.h   (working copy)
> @@ -196,7 +196,7 @@ union rtunion_def
>addr_diff_vec_flags rt_addr_diff_vec_flags;
>struct cselib_val_struct *rt_cselib;
>tree rt_tree;
> -  struct basic_block_def *rt_bb;
> +  basic_block rt_bb;
>mem_attrs *rt_mem;
>reg_attrs *rt_reg;
>struct constant_descriptor_rtx *rt_constant;
> @@ -1735,7 +1735,7 @@ extern rtx assign_temp (tree, int, int);
>
>  /* In emit-rtl.c */
>  extern rtx emit_insn_before (rtx, rtx);
> -extern rtx emit_insn_before_noloc (rtx, rtx, struct basic_block_def *);
> +extern rtx emit_insn_before_noloc (rtx, rtx, basic_block);
>  extern rtx emit_insn_before_setloc (rtx, rtx, int);
>  extern rtx emit_jump_insn_before (rtx, rtx);
>  extern rtx emit_jump_insn_before_noloc (rtx, rtx);
> @@ -1750,7 +1750,7 @@ extern rtx emit_barrier_before (rtx);
>  extern rtx emit_label_before (rtx, rtx);
>  extern rtx emit_note_before (enum insn_note, rtx);
>  extern rtx emit_insn_after (rtx, rtx);
> -extern rtx emit_insn_after_noloc (rtx, rtx, struct basic_block_def *);
> +extern rtx emit_insn_after_noloc (rtx, rtx, basic_block);
>  extern rtx emit_insn_after_setloc (rtx, rtx, int);
>  extern rtx emit_jump_insn_after (rtx, rtx);
>  extern rtx emit_jump_insn_after_noloc (rtx, rtx);
> @@ -2435,8 +2435,8 @@ extern void unshare_all_rtl_in_chain (rt
>  extern void verify_rtl_sharing (void);
>  extern void link_cc0_insns (rtx);
>  extern v

Re: [patch] A few more header cleanups

2012-07-11 Thread Richard Guenther
On Wed, Jul 11, 2012 at 9:36 AM, Steven Bosscher  wrote:
> Hello,
>
> This is another round of small header include cleanups.
>
> Note: I'm not going through this work completely at random, but it's
> sometimes hard to keep an overview of all the changes which is why I'm
> sending them in bits. For example, moving the can_move_by_pieces
> prototype really doesn't improve things at the surface, but it helps
> expose what is needed by what, i.e. define the interfaces.
> Longer-term, it should be easier to move/regroup code once the
> interfaces are clear...
>
> Bootstrapped&tested on x86_64-unknown-linux-gnu, and build
> cross-compilers (cc1) to sparc-linux and ia64-linux. OK for trunk?

Ok with adjusting dependences in Makefile.in (if required).

Thanks,
Richard.

> Ciao!
> Steven
>
>
> gcc/
> * expr.h (can_move_by_pieces): Move prototype from here ...
> * tree.h (can_move_by_pieces): ... to here.
> * optabs.h (set_widening_optab_handler): Use XCNEW.
> * gimplify.c: Do not include expr.h.
>
> * toplev.c: Do not include dwarf2out.h.
> * config/ia64/ia64.c: Likewise.
> * config/sparc/sparc.c: Likewise.
>
> c-family/
> * c-family/c-gimplify.c: Do not include basic-block.h.
> * c-family/c-common.c: Do not include libfuncs.h.
>
> cp/
> * cp/method.c: Do not include tree-pass.h.
>
> fortran/
> * fortran/trans.c: Do not include defaults.h.
> * fortran/trans-intrinsic.c: Likewise.
>
> java/
> * java/decl.c: Do not include libfuncs.h.
> * class.c: Do not include defaults.h.
> * jvgenmain.c: Likewise.
> * magnle.c: Likewise.
>
> Index: expr.h
> ===
> --- expr.h  (revision 189365)
> +++ expr.h  (working copy)
> @@ -369,11 +369,6 @@ rtx set_storage_via_libcall (rtx, rtx, r
>  extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
> unsigned int, HOST_WIDE_INT);
>
> -/* Determine whether the LEN bytes can be moved by using several move
> -   instructions.  Return nonzero if a call to move_by_pieces should
> -   succeed.  */
> -extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
> -
>  extern unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
>  unsigned int,
>  unsigned int);
> Index: tree.h
> ===
> --- tree.h  (revision 189365)
> +++ tree.h  (working copy)
> @@ -5886,6 +5886,12 @@ extern void fini_object_sizes (void);
>  extern unsigned HOST_WIDE_INT compute_builtin_object_size (tree, int);
>
>  /* In expr.c.  */
> +
> +/* Determine whether the LEN bytes can be moved by using several move
> +   instructions.  Return nonzero if a call to move_by_pieces should
> +   succeed.  */
> +extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
> +
>  extern unsigned HOST_WIDE_INT highest_pow2_factor (const_tree);
>  extern tree build_personality_function (const char *);
>
> Index: optabs.h
> ===
> --- optabs.h(revision 189366)
> +++ optabs.h(working copy)
> @@ -1051,8 +1051,7 @@ set_widening_optab_handler (optab op, en
>else
>  {
>if (op->widening == NULL)
> -   op->widening = (struct widening_optab_handlers *)
> - xcalloc (1, sizeof (struct widening_optab_handlers));
> +   op->widening = XCNEW (struct widening_optab_handlers);
>
>op->widening->handlers[(int) to_mode][(int) from_mode].insn_code = 
> code;
>  }
> Index: gimplify.c
> ===
> --- gimplify.c  (revision 189365)
> +++ gimplify.c  (working copy)
> @@ -47,8 +47,6 @@ along with GCC; see the file COPYING3.
>  #include "tree-pass.h"
>
>  #include "langhooks-def.h" /* FIXME: for lhd_set_decl_assembler_name.  */
> -#include "expr.h"  /* FIXME: for can_move_by_pieces
> -  and STACK_CHECK_MAX_VAR_SIZE.  */
>
>  enum gimplify_omp_var_data
>  {
> @@ -3970,7 +3968,7 @@ gimplify_init_constructor (tree *expr_p,
> walk_tree (&DECL_INITIAL (object), force_labels_r, NULL, NULL);
>
> /* ??? C++ doesn't automatically append a . to the
> -  assembler name, and even when it does, it looks a FE private
> +  assembler name, and even when it does, it looks at FE private
>data structures to figure out what that number should be,
>which are not set for this variable.  I suppose this is
>important for local statics for inline functions, which aren't
> Index: toplev.c
> ===
> --- toplev.c(revision 189365)
> +++ toplev.c(working copy)
> @@ -

[patch] Add ATTRIBUTE_MEMALLOC to a few malloc functions in GCC

2012-07-11 Thread Steven Bosscher
Hello,

One for under the "eat your own dog food" header.

Bootstrapped and tested on powerpc64-unknown-linux-gnu.
cc1 is a few bytes smaller. I expect the benefits to be more
noticeable with an LTO-enabled bootstrap, but I haven't figured out
yet how to do that :-)

OK for trunk?

Ciao!
Steven

* ggc.h (ggc_internal_alloc_stat, ggc_alloc_typed_stat,
ggc_internal_cleared_alloc_stat, ggc_cleared_alloc_htab_ignore_args,
ggc_cleared_alloc_ptr_array_two_args, ggc_splay_alloc): Add
ATTRIBUTE_MALLOC.
* alloc-pool.h (pool_alloc): Likewise.


ATTRIBUTE_MALLOC.diff
Description: Binary data


[patch] Remove unused java/expr.c:force_evaluation_order

2012-07-11 Thread Steven Bosscher
Hello,

Remove force_evaluation_order because the only caller is
force_evaluation_order itself.
Will commit after a few days, if no-one objects.

Ciao!
Steven

java/
* java-tree.h (force_evaluation_order): Remove prototype.
* expr.c (force_evaluation_order): Remove unused function.

Index: java-tree.h
===
--- java-tree.h (revision 189423)
+++ java-tree.h (working copy)
@@ -1094,7 +1094,6 @@ extern int merge_type_state (tree);
 extern int push_type_0 (tree);
 extern void push_type (tree);
 extern void add_interface (tree, tree);
-extern tree force_evaluation_order (tree);
 extern tree java_create_object (tree);
 extern int verify_constant_pool (struct JCF *);
 extern void start_java_method (tree);
Index: expr.c
===
--- expr.c  (revision 189423)
+++ expr.c  (working copy)
@@ -3689,82 +3689,6 @@ maybe_adjust_start_pc (struct JCF *jcf,
   return start_pc;
 }

-/* Force the (direct) sub-operands of NODE to be evaluated in left-to-right
-   order, as specified by Java Language Specification.
-
-   The problem is that while expand_expr will evaluate its sub-operands in
-   left-to-right order, for variables it will just return an rtx (i.e.
-   an lvalue) for the variable (rather than an rvalue).  So it is possible
-   that a later sub-operand will change the register, and when the
-   actual operation is done, it will use the new value, when it should
-   have used the original value.
-
-   We fix this by using save_expr.  This forces the sub-operand to be
-   copied into a fresh virtual register,
-
-   For method invocation, we modify the arguments so that a
-   left-to-right order evaluation is performed. Saved expressions
-   will, in CALL_EXPR order, be reused when the call will be expanded.
-
-   We also promote outgoing args if needed.  */
-
-tree
-force_evaluation_order (tree node)
-{
-  if (flag_syntax_only)
-return node;
-  if (TREE_CODE (node) == CALL_EXPR
-  || (TREE_CODE (node) == COMPOUND_EXPR
- && TREE_CODE (TREE_OPERAND (node, 0)) == CALL_EXPR
- && TREE_CODE (TREE_OPERAND (node, 1)) == SAVE_EXPR))
-{
-  tree call, cmp;
-  int i, nargs;
-
-  /* Account for wrapped around ctors.  */
-  if (TREE_CODE (node) == COMPOUND_EXPR)
-call = TREE_OPERAND (node, 0);
-  else
-   call = node;
-
-  nargs = call_expr_nargs (call);
-
-  /* This reverses the evaluation order. This is a desired effect. */
-  for (i = 0, cmp = NULL_TREE; i < nargs; i++)
-   {
- tree arg = CALL_EXPR_ARG (call, i);
- /* Promote types smaller than integer.  This is required by
-some ABIs.  */
- tree type = TREE_TYPE (arg);
- tree saved;
- if (targetm.calls.promote_prototypes (type)
- && INTEGRAL_TYPE_P (type)
- && INT_CST_LT_UNSIGNED (TYPE_SIZE (type),
- TYPE_SIZE (integer_type_node)))
-   arg = fold_convert (integer_type_node, arg);
-
- saved = save_expr (force_evaluation_order (arg));
- cmp = (cmp == NULL_TREE ? saved :
-build2 (COMPOUND_EXPR, void_type_node, cmp, saved));
-
- CALL_EXPR_ARG (call, i) = saved;
-   }
-
-  if (cmp && TREE_CODE (cmp) == COMPOUND_EXPR)
-   TREE_SIDE_EFFECTS (cmp) = 1;
-
-  if (cmp)
-   {
- cmp = build2 (COMPOUND_EXPR, TREE_TYPE (node), cmp, node);
- if (TREE_TYPE (cmp) != void_type_node)
-   cmp = save_expr (cmp);
- TREE_SIDE_EFFECTS (cmp) = 1;
- node = cmp;
-   }
-}
-  return node;
-}
-
 /* Build a node to represent empty statements and blocks. */

 tree


Re: [patch] Add ATTRIBUTE_MEMALLOC to a few malloc functions in GCC

2012-07-11 Thread Richard Guenther
On Wed, Jul 11, 2012 at 11:41 AM, Steven Bosscher  wrote:
> Hello,
>
> One for under the "eat your own dog food" header.
>
> Bootstrapped and tested on powerpc64-unknown-linux-gnu.
> cc1 is a few bytes smaller. I expect the benefits to be more
> noticeable with an LTO-enabled bootstrap, but I haven't figured out
> yet how to do that :-)
>
> OK for trunk?

Hmm, we assume that the contents of the allocated memory is
undefined (zero content is ok practically).  So if there is no hidden
init code anywhere in those functions the patch is ok.

Thanks,
Richard.


> Ciao!
> Steven
>
> * ggc.h (ggc_internal_alloc_stat, ggc_alloc_typed_stat,
> ggc_internal_cleared_alloc_stat, ggc_cleared_alloc_htab_ignore_args,
> ggc_cleared_alloc_ptr_array_two_args, ggc_splay_alloc): Add
> ATTRIBUTE_MALLOC.
> * alloc-pool.h (pool_alloc): Likewise.


Re: [patch] Remove unused java/expr.c:force_evaluation_order

2012-07-11 Thread Andrew Haley
On 07/11/2012 10:44 AM, Steven Bosscher wrote:
> Remove force_evaluation_order because the only caller is
> force_evaluation_order itself.
> Will commit after a few days, if no-one objects.

No problem.  I presume that its caller was removed because it wasn't
needed, but I don't quite know why it wasn't needed.  Perhaps it's a
side-effect of the Tree-SSA conversion.

Andrew.


Re: [patch] Add ATTRIBUTE_MEMALLOC to a few malloc functions in GCC

2012-07-11 Thread Steven Bosscher
On Wed, Jul 11, 2012 at 12:06 PM, Richard Guenther
 wrote:
> On Wed, Jul 11, 2012 at 11:41 AM, Steven Bosscher  
> wrote:
>> Hello,
>>
>> One for under the "eat your own dog food" header.
>>
>> Bootstrapped and tested on powerpc64-unknown-linux-gnu.
>> cc1 is a few bytes smaller. I expect the benefits to be more
>> noticeable with an LTO-enabled bootstrap, but I haven't figured out
>> yet how to do that :-)
>>
>> OK for trunk?
>
> Hmm, we assume that the contents of the allocated memory is
> undefined (zero content is ok practically).  So if there is no hidden
> init code anywhere in those functions the patch is ok.

Yes, I was careful to watch out for that. This is why e.g.
ggc_alloc_string_stat isn't an ATTRIBUTE_MALLOC function. The only
thing I wasn't sure about, is whether the poisoning of memory with
ENABLE_GC_CHECKING counts as undefined.

Ciao!
Steven


Re: [patch] Add ATTRIBUTE_MEMALLOC to a few malloc functions in GCC

2012-07-11 Thread Richard Guenther
On Wed, Jul 11, 2012 at 12:11 PM, Steven Bosscher  wrote:
> On Wed, Jul 11, 2012 at 12:06 PM, Richard Guenther
>  wrote:
>> On Wed, Jul 11, 2012 at 11:41 AM, Steven Bosscher  
>> wrote:
>>> Hello,
>>>
>>> One for under the "eat your own dog food" header.
>>>
>>> Bootstrapped and tested on powerpc64-unknown-linux-gnu.
>>> cc1 is a few bytes smaller. I expect the benefits to be more
>>> noticeable with an LTO-enabled bootstrap, but I haven't figured out
>>> yet how to do that :-)
>>>
>>> OK for trunk?
>>
>> Hmm, we assume that the contents of the allocated memory is
>> undefined (zero content is ok practically).  So if there is no hidden
>> init code anywhere in those functions the patch is ok.
>
> Yes, I was careful to watch out for that. This is why e.g.
> ggc_alloc_string_stat isn't an ATTRIBUTE_MALLOC function. The only
> thing I wasn't sure about, is whether the poisoning of memory with
> ENABLE_GC_CHECKING counts as undefined.

Yes, I suppose so.  The point is that there sould be no way to
reproducibly create a valid pointer by combining data from that
undefined content.  Thus,

 char *mem = alloc ();
 int *ptr = mem[4] | (mem[7] + 3) << 8 | ...;

should never yield a pointer to an actual integer object.

  if (mem[4] == 3)
   ptr = &i;
  else
   ptr = &j;

is ok, thus only flow of actual data is what matters.  "Zero filling is ok"
may be overly optimistic for -fno-delete-nullpointer-checks targets where
zero is a valid object address though ...

Richard.

> Ciao!
> Steven


Re: new sign/zero extension elimination pass

2012-07-11 Thread Tom de Vries
On 13/11/10 10:50, Eric Botcazou wrote:
>> I profiled the pass on spec2000:
>>
>> -mabi=32 -mabi=64
>> ee-pass (usr time): 0.70 1.16
>> total   (usr time):   919.30   879.26
>> ee-pass(%): 0.08 0.13
>>
>> The pass takes 0.13% or less of the total usr runtime.
> 
> For how many hits?  What are the numbers with --param ee-max-propagate=0?
> 
>> Is it necessary to improve the runtime of this pass?
> 
> I've already given my opinion about the implementation.  The other passes in 
> the compiler try hard not to rescan everything when a single bit changes; as 
> currently written, yours doesn't.
> 

Eric,

I've done the following:
- refactored the pass such that it now scans at most twice over all
  instructions.
- updated the patch to be applicable to current trunk
- updated the motivating example to a more applicable one (as discussed in
  this thread), and added that one as test-case.
- added a part in the header comment illustrating the working of the pass
  on the motivating example.

bootstrapped and reg-tested on x86_64 and i686.

build and reg-tested on mips, mips64, and arm.

OK for trunk?

Thanks,
- Tom

2012-07-10  Tom de Vries  

* ee.c: New file.
* tree-pass.h (pass_ee): Declare.
* opts.c ( default_options_table): Set flag_ee at -O2.
* timevar.def (TV_EE): New timevar.
* common.opt (fextension-elimination): New option.
* Makefile.in (ee.o): New rule.
* passes.c (pass_ee): Add it.

* gcc.dg/extend-1.c: New test.
* gcc.dg/extend-2.c: Same.
* gcc.dg/extend-2-64.c: Same.
* gcc.dg/extend-3.c: Same.
* gcc.dg/extend-4.c: Same.
* gcc.dg/extend-5.c: Same.
* gcc.target/mips/octeon-bbit-2.c: Make test more robust.
Index: gcc/tree-pass.h
===
--- gcc/tree-pass.h (revision 189409)
+++ gcc/tree-pass.h (working copy)
@@ -483,6 +483,7 @@ extern struct gimple_opt_pass pass_fixup
 
 extern struct rtl_opt_pass pass_expand;
 extern struct rtl_opt_pass pass_instantiate_virtual_regs;
+extern struct rtl_opt_pass pass_ee;
 extern struct rtl_opt_pass pass_rtl_fwprop;
 extern struct rtl_opt_pass pass_rtl_fwprop_addr;
 extern struct rtl_opt_pass pass_jump;
Index: gcc/testsuite/gcc.target/mips/octeon-bbit-2.c
===
--- gcc/testsuite/gcc.target/mips/octeon-bbit-2.c (revision 189409)
+++ gcc/testsuite/gcc.target/mips/octeon-bbit-2.c (working copy)
@@ -5,19 +5,19 @@
 /* { dg-final { scan-assembler "\tbnel\t" } } */
 /* { dg-final { scan-assembler-not "\tbne\t" } } */
 
-NOMIPS16 int
-f (int n, int i)
+NOMIPS16 long int
+f (long int n, long int i)
 {
-  int s = 0;
+  long int s = 0;
   for (; i & 1; i++)
 s += i;
   return s;
 }
 
-NOMIPS16 int
-g (int n, int i)
+NOMIPS16 long int
+g (long int n, long int i)
 {
-  int s = 0;
+  long int s = 0;
   for (i = 0; i < n; i++)
 s += i;
   return s;
Index: gcc/testsuite/gcc.dg/extend-4.c
===
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/extend-4.c (revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ee" } */
+
+unsigned char f(unsigned int a, int c)
+{
+  unsigned int b = a;
+  if (c)
+b = a & 0x10ff;
+  return b;
+}
+
+/* { dg-final { scan-rtl-dump-times "_extend:" 1 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump-times "and:" 0 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump "redundant extension \[0-9\]+ removed" "ee" { target mips*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "ee" } } */
+
Index: gcc/testsuite/gcc.dg/extend-1.c
===
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/extend-1.c (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ee" } */
+
+void f(unsigned char * p, short s, int c, int *z)
+{
+  if (c)
+*z = 0;
+  *p ^= (unsigned char)s;
+}
+
+/* { dg-final { scan-rtl-dump-times "sign_extend:" 0 "ee" { target mips*-*-* } } } */
+/* { dg-final { scan-rtl-dump-times "redundant extension \[0-9\]+ replaced" 1 "ee" { target mips*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "ee" } } */
Index: gcc/testsuite/gcc.dg/extend-5.c
===
--- /dev/null (new file)
+++ gcc/testsuite/gcc.dg/extend-5.c (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-ee" } */
+
+void f (short d[2][2])
+{
+  int d0 = d[0][0] + d[0][1];
+  int d1 = d[1][0] + d[1][1];
+  d[0][0] = d0 + d1;
+  d[0][1] = d0 - d1;
+}
+
+/* { dg-final { scan-rtl-dump-times "redundant extension \[0-9\]+ replaced" 2 "ee" { target mips*-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "ee" } } */
Index: gcc/testsuite/gcc.dg/extend-2.c

Re: [PATCH] Fix breakage for m68k-linux introduced by 4a020a8 / r189359

2012-07-11 Thread Jan-Benedict Glaw
On Wed, 2012-07-11 08:44:33 +0200, Steven Bosscher  
wrote:
> On Wed, Jul 11, 2012 at 7:49 AM, Jan-Benedict Glaw  wrote:
> > Hi!
> >
> > Git revision 4a020a8 [aka. SVN 189359], the large header reordering patch,
> > broke m68k-linux (.../configure --target=m68k-linux --prefix=...
> > --enable-languages=c --disable-threads) for me:
> >
> > [...]
> > gcc -c   -g -O2 -DIN_GCC -DCROSS_DIRECTORY_STRUCTURE  -W -Wall 
> > -Wno-narrowing -Wwrite-strings -Wcast-qual -Wstrict-prototypes 
> > -Wmissing-prototypes -Wmissing-format-attribute -pedantic -Wno
> > -long-long -Wno-variadic-macros -Wno-overlength-strings 
> > -Wold-style-definition -Wc++-compat -fno-common  -DHAVE_CONFIG_H -I. -I. 
> > -I../../../../gcc/gcc -I../../../../gcc/gcc/. 
> > -I../../../../gcc/gcc/../include -I../../../../gcc/gcc/../libcpp/include  
> > -I../../../../gcc/gcc/../libdecnumber 
> > -I../../../../gcc/gcc/../libdecnumber/dpd -I../libdecnumber
> > ../../../../gcc/gcc/resource.c -o resource.o
> > ../../../../gcc/gcc/resource.c: In function ‘init_resource_info’:
> > ../../../../gcc/gcc/resource.c:1179:5: error: ‘current_function_decl’ 
> > undeclared (first use in this function)
> > ../../../../gcc/gcc/resource.c:1179:5: note: each undeclared identifier is 
> > reported only once for each function it appears in
> > make[2]: *** [resource.o] Error 1
[...]
> Did your build already include
> http://gcc.gnu.org/ml/gcc-patches/2012-07/msg00378.html ?

No, this was fixed parallel to my observation. With an almost
identical patch:)

Thanks for the work!

MfG, JBG

-- 
  Jan-Benedict Glaw  jbg...@lug-owl.de  +49-172-7608481
Signature of:  Alles sollte so einfach wie möglich gemacht sein.
the second  :  Aber nicht einfacher.  (Einstein)


signature.asc
Description: Digital signature


Fix PR53908

2012-07-11 Thread Bernd Schmidt
We're moving a load across a call since we don't recognize calls as
memory-clobbering.

Bootstrapping and testing now on 4.7 x86_64-linux, ok everywhere?


Bernd
	PR rtl-optimization/53908
	* df-problems.c (can_move_insns_across): Calls can clobber memory.

Index: gcc/df-problems.c
===
--- gcc/df-problems.c	(revision 189425)
+++ gcc/df-problems.c	(working copy)
@@ -3961,6 +3961,11 @@ can_move_insns_across (rtx from, rtx to,
 
   for (insn = across_to; ; insn = next)
 {
+  if (CALL_P (insn))
+	{
+	  memrefs_in_across |= MEMREF_VOLATILE;
+	  mem_sets_in_across |= MEMREF_VOLATILE;
+	}
   if (NONDEBUG_INSN_P (insn))
 	{
 	  memrefs_in_across |= for_each_rtx (&PATTERN (insn), find_memory,


Re: new sign/zero extension elimination pass

2012-07-11 Thread Jakub Jelinek
On Wed, Jul 11, 2012 at 12:30:12PM +0200, Tom de Vries wrote:
> I've done the following:
> - refactored the pass such that it now scans at most twice over all
>   instructions.
> - updated the patch to be applicable to current trunk
> - updated the motivating example to a more applicable one (as discussed in
>   this thread), and added that one as test-case.
> - added a part in the header comment illustrating the working of the pass
>   on the motivating example.
> 
> bootstrapped and reg-tested on x86_64 and i686.
> 
> build and reg-tested on mips, mips64, and arm.

How does this relate to the ree.c pass we already have?
Why is not REE sufficient for you?  Having two different zero/sign extension
elimination passes would be really wierd.

Jakub


Re: C++ PATCH for c++/53733 (DR 1402, deleting move ctor)

2012-07-11 Thread Jason Merrill

On 07/10/2012 02:04 AM, Jason Merrill wrote:

Apparently we need to implement DR 1402 in 4.7 in order to fix the
std::pair ABI breakage properly.  So here it is: if overload resolution
chooses a non-trivial copy constructor, instead of causing the move
constructor to be deleted, we just don't implicitly declare it.


This is an incomplete implementation of (the current proposed resolution 
of) DR 1402, which also changes the conditions when a virtual base 
interferes with an implicitly-declared move assignment operator.  This 
patch implements that, as well as some code cleanup.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit 4295ec42ce03f2a4283c0e82d5cbdce681a68efa
Author: Jason Merrill 
Date:   Tue Jul 10 18:19:28 2012 +0200

	DR 1402
	* method.c (synthesized_method_walk): Replace uses of msg with diag.
	Correct handling of virtual bases with move operations.
	(process_subob_fn, walk_field_subobs): Replace uses of msg with diag.

diff --git a/gcc/cp/method.c b/gcc/cp/method.c
index 79edf81..f3fd7b8 100644
--- a/gcc/cp/method.c
+++ b/gcc/cp/method.c
@@ -923,7 +923,7 @@ get_copy_assign (tree type)
 static void
 process_subob_fn (tree fn, bool move_p, tree *spec_p, bool *trivial_p,
 		  bool *deleted_p, bool *constexpr_p, bool *no_implicit_p,
-		  const char *msg, tree arg)
+		  bool diag, tree arg)
 {
   if (!fn || fn == error_mark_node)
 goto bad;
@@ -943,7 +943,7 @@ process_subob_fn (tree fn, bool move_p, tree *spec_p, bool *trivial_p,
 	{
 	  if (deleted_p)
 	*deleted_p = true;
-	  if (msg)
+	  if (diag)
 	error ("union member %q+D with non-trivial %qD", arg, fn);
 	}
 }
@@ -956,7 +956,7 @@ process_subob_fn (tree fn, bool move_p, tree *spec_p, bool *trivial_p,
   if (constexpr_p && !DECL_DECLARED_CONSTEXPR_P (fn))
 {
   *constexpr_p = false;
-  if (msg)
+  if (diag)
 	{
 	  inform (0, "defaulted constructor calls non-constexpr "
 		  "%q+D", fn);
@@ -979,7 +979,7 @@ walk_field_subobs (tree fields, tree fnname, special_function_kind sfk,
 		   int quals, bool copy_arg_p, bool move_p,
 		   bool assign_p, tree *spec_p, bool *trivial_p,
 		   bool *deleted_p, bool *constexpr_p, bool *no_implicit_p,
-		   const char *msg, int flags, tsubst_flags_t complain)
+		   bool diag, int flags, tsubst_flags_t complain)
 {
   tree field;
   for (field = fields; field; field = DECL_CHAIN (field))
@@ -996,13 +996,13 @@ walk_field_subobs (tree fields, tree fnname, special_function_kind sfk,
 	  bool bad = true;
 	  if (CP_TYPE_CONST_P (mem_type) && !CLASS_TYPE_P (mem_type))
 	{
-	  if (msg)
+	  if (diag)
 		error ("non-static const member %q#D, can%'t use default "
 		   "assignment operator", field);
 	}
 	  else if (TREE_CODE (mem_type) == REFERENCE_TYPE)
 	{
-	  if (msg)
+	  if (diag)
 		error ("non-static reference member %q#D, can%'t use "
 		   "default assignment operator", field);
 	}
@@ -1018,7 +1018,7 @@ walk_field_subobs (tree fields, tree fnname, special_function_kind sfk,
 
 	  if (DECL_INITIAL (field))
 	{
-	  if (msg && DECL_INITIAL (field) == error_mark_node)
+	  if (diag && DECL_INITIAL (field) == error_mark_node)
 		inform (0, "initializer for %q+#D is invalid", field);
 	  if (trivial_p)
 		*trivial_p = false;
@@ -1041,14 +1041,14 @@ walk_field_subobs (tree fields, tree fnname, special_function_kind sfk,
 	  if (CP_TYPE_CONST_P (mem_type)
 	  && default_init_uninitialized_part (mem_type))
 	{
-	  if (msg)
+	  if (diag)
 		error ("uninitialized non-static const member %q#D",
 		   field);
 	  bad = true;
 	}
 	  else if (TREE_CODE (mem_type) == REFERENCE_TYPE)
 	{
-	  if (msg)
+	  if (diag)
 		error ("uninitialized non-static reference member %q#D",
 		   field);
 	  bad = true;
@@ -1064,7 +1064,7 @@ walk_field_subobs (tree fields, tree fnname, special_function_kind sfk,
 	  && TREE_CODE (DECL_CONTEXT (field)) != UNION_TYPE)
 	{
 	  *constexpr_p = false;
-	  if (msg)
+	  if (diag)
 		inform (0, "defaulted default constructor does not "
 			"initialize %q+#D", field);
 	}
@@ -1078,7 +1078,7 @@ walk_field_subobs (tree fields, tree fnname, special_function_kind sfk,
 	  walk_field_subobs (TYPE_FIELDS (mem_type), fnname, sfk, quals,
 			 copy_arg_p, move_p, assign_p, spec_p, trivial_p,
 			 deleted_p, constexpr_p, no_implicit_p,
-			 msg, flags, complain);
+			 diag, flags, complain);
 	  continue;
 	}
 
@@ -1095,7 +1095,7 @@ walk_field_subobs (tree fields, tree fnname, special_function_kind sfk,
   rval = locate_fn_flags (mem_type, fnname, argtype, flags, complain);
 
   process_subob_fn (rval, move_p, spec_p, trivial_p, deleted_p,
-			constexpr_p, no_implicit_p, msg, field);
+			constexpr_p, no_implicit_p, diag, field);
 }
 }
 
@@ -1116,7 +1116,6 @@ synthesized_method_walk (tree ctype, special_function_kind sfk, bool const_p,
   VEC(tree,gc) *vbases;
   int i, quals, flags;
   tsubst_flags_t comp

Re: new sign/zero extension elimination pass

2012-07-11 Thread Tom de Vries
On 11/07/12 13:41, Jakub Jelinek wrote:
> On Wed, Jul 11, 2012 at 12:30:12PM +0200, Tom de Vries wrote:
>> I've done the following:
>> - refactored the pass such that it now scans at most twice over all
>>   instructions.
>> - updated the patch to be applicable to current trunk
>> - updated the motivating example to a more applicable one (as discussed in
>>   this thread), and added that one as test-case.
>> - added a part in the header comment illustrating the working of the pass
>>   on the motivating example.
>>
>> bootstrapped and reg-tested on x86_64 and i686.
>>
>> build and reg-tested on mips, mips64, and arm.
> 
> How does this relate to the ree.c pass we already have?
> Why is not REE sufficient for you?  Having two different zero/sign extension
> elimination passes would be really wierd.
> 

Jakub,

pass_ree eliminates extensions by merging an extension with the definitions of
the register it extends. So pass_ree can perhaps be described as an inter-bb
combiner that is targeted at extensions.

AFAIU there is no analysis in pass_ree that concludes that an extension is
redundant. If it manages to combine an extension with all the insns feeding into
the extension, the extension has been made redundant.
Both redundant and non-redundant extensions can be eliminated by pass_ree.

pass_ee does an analysis of what parts of registers are used, and concludes
based on that analysis that an extension is redundant, meaning it can be
replaced by a regcopy without changing the semantics of the program.

We currently describe pass_ree as 'redundant extension elimination',
and pass_ee as 'extension elimination'.
Perhaps 'redundant extension elimination' is a more appropriate name for pass_ee
and pass_ree is better described as 'extension combiner'.

In the motivating example of the pass, there are 2 extensions. Those extensions
cannot be combined into the insns feeding into them (or into the insns they feed
into). They are redundant however, something which is analyzed by pass_ee. It
replaces the 2 extensions with regcopies, and in the resulting assembly the 2
redundant extensions are removed.

Thanks,
- Tom

>   Jakub
> 




[testsuite] Allow for / comments in g++.dg/debug/dwarf2/pubnames-2.C

2012-07-11 Thread Rainer Orth
g++.dg/debug/dwarf2/pubnames-2.C currently FAILs on Solaris/x86 since
comments start with /, not # for both Sun as and gas.  The following
patch fixes this by allowing the alternate comment character.

Tested with the appropriate runtest invocation on i386-pc-solaris2.11
and x86_64-unknown-linux-gnu, installed on mainline.

Rainer


2012-07-11  Rainer Orth  

* g++.dg/debug/dwarf2/pubnames-2.C: Allow for / comments.

# HG changeset patch
# Parent 50c8f2aefc6964a7b981ed52d3384bbfbc40b1e7
Allow for / comments in g++.dg/debug/dwarf2/pubnames-2.C

diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/pubnames-2.C b/gcc/testsuite/g++.dg/debug/dwarf2/pubnames-2.C
--- a/gcc/testsuite/g++.dg/debug/dwarf2/pubnames-2.C
+++ b/gcc/testsuite/g++.dg/debug/dwarf2/pubnames-2.C
@@ -1,63 +1,63 @@
 // { dg-do compile }
 // { dg-options "-gpubnames -gdwarf-4 -std=c++0x -dA" }
 // { dg-final { scan-assembler ".section\t.debug_pubnames" } }
-// { dg-final { scan-assembler "\"\\(anonymous namespace\\)0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"one0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"one::G_A0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"one::G_B0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"one::G_C0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"one::\\(anonymous namespace\\)0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"F_A0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"F_B0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"F_C0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"inline_func_10\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"one::c1::c10\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"one::c1::~c10\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"one::c1::val0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"check_enum0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"main0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2::c20\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2::c20\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2::c20\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"check0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"check \\>0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"check \\>0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"check \\>0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2::val0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2::val0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2::val0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"__static_initialization_and_destruction_00\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2::~c20\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2::~c20\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2::~c20\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"_GLOBAL__sub_I__ZN3one3c1vE0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"anonymous_union_var0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::ci0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2v10\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2v20\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"two::c2v30\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"one::c1v0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"one::\\(anonymous namespace\\)::one_anonymous_var0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"\\(anonymous namespace\\)::c1_count0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"\\(anonymous namespace\\)::c2_count0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"\\(anonymous namespace\\)::three0\"+\[ \t\]+\[#;]+\[ \t\]+external name" } }
-// { dg-final { scan-assembler "\"\\(an

Re: [testsuite] Allow for / comments in g++.dg/debug/dwarf2/pubnames-2.C

2012-07-11 Thread Andreas Schwab
Rainer Orth  writes:

> g++.dg/debug/dwarf2/pubnames-2.C currently FAILs on Solaris/x86 since
> comments start with /, not # for both Sun as and gas.  The following
> patch fixes this by allowing the alternate comment character.

Do we have to repeat this for every new test that scans the assembler
output?

>   * g++.dg/debug/dwarf2/pubnames-2.C: Allow for / comments.

Please also add | for m68k and @ for arm.  Don't you want ! as well?

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."


Re: [testsuite] Allow for / comments in g++.dg/debug/dwarf2/pubnames-2.C

2012-07-11 Thread Mike Stump

On Jul 11, 2012, at 9:30 AM, Andreas Schwab wrote:

> Rainer Orth  writes:
> 
>> g++.dg/debug/dwarf2/pubnames-2.C currently FAILs on Solaris/x86 since
>> comments start with /, not # for both Sun as and gas.  The following
>> patch fixes this by allowing the alternate comment character.
> 
> Do we have to repeat this for every new test that scans the assembler
> output?
> 
>>  * g++.dg/debug/dwarf2/pubnames-2.C: Allow for / comments.
> 
> Please also add | for m68k and @ for arm.  Don't you want ! as well?

I've been known to use . (regexp for any character) in patterns, on the grounds 
that all ports known to me use a single character for the comment character, 
but, vms is likely to use something I can't predict.  :-)



[PATCH] New fdo summary-based icache sensitive unrolling (issue6351086)

2012-07-11 Thread Teresa Johnson
Ports some patches related to improving FDO program summary information
and using it to guide loop unrolling from google branches to mainline.
The patch is enhanced to add additional summary information to aid
in determining hot/cold decisions.

The original patch description is at:
  http://gcc.gnu.org/ml/gcc-patches/2012-06/msg00437.html
and further discussion about incorporating onto mainline is at:
  http://gcc.gnu.org/ml/gcc-patches/2012-06/threads.html#00414

Honza, can you take a look to see if this patch meets your needs?

Full description:

This patch adds new program summary information to the gcov
profile files that indicate how many profiled counts compose
the majority of the program's execution time. This is used to
provide an indication of the overall code size of the program.

The new profile summary information is then used to guide
codesize based unroll and peel decisions, to prevent those
optimizations from increasing code size too much when the
program may be sensitive to icache effects.

This patch also pulls in dependent portions of google/main r187660 that cache
additional loop analysis results in the niter_desc auxiliary information
hanging off the loop structure (the optimization portions of that
change are not included here, and have an outstanding review request
for mainline).

Bootstrapped and tested on x86_64-unknown-linux-gnu. Ok for trunk?

Thanks,
Teresa

2012-07-11  Teresa Johnson  

* libgcc/libgcov.c (sort_by_reverse_gcov_value): New function.
(gcov_compute_cutoff_values): Ditto.
(gcov_exit): Call gcov_compute_cutoff_values and merge new summary
information.
* gcc/doc/invoke.texi (roll much): Document new options
-fpeel-codesize-limit and -funroll-codesize-limit, and new params
codesize-hotness-threshold and unrollpeel-hotness-threshold.
* gcc/gcov-io.c (gcov_write_summary): Write new summary info.
(gcov_read_summary): Read new summary info.
* gcc/gcov-io.h (GCOV_TAG_SUMMARY_LENGTH): Update for new summary info.
(struct gcov_ctr_summary): Add new summary info: num_hot_counters and
hot_cutoff_value.
* gcc/loop-unroll.c (code_size_limit_factor): New function.
(decide_unroll_runtime_iterations): Call code_size_limit_factor
to control the unroll factor, and retrieve number of branches from
niter_desc instead of via function that walks loop.
(decide_peel_simple, decide_unroll_stupid): Ditto.
* gcc/coverage.c (read_counts_file): Propagate new summary info.
* gcc/loop-iv.c (get_simple_loop_desc): Invoke new analyze_loop_insns
function, and add guards to enable this function to work for the
outermost loop.
* gcc/common.opt: Add -fpeel-codesize-limit and
-funroll-codesize-limit.
* gcc/cfgloop.c (insn_has_fp_set, analyze_loop_insns): New functions.
(num_loop_branches): Remove.
* gcc/cfgloop.h (struct niter_desc): Added new fields to cache
additional loop analysis information.
(num_loop_branches): Remove.
(analyze_loop_insns): Declare.
* gcc/params.def (PARAM_UNROLLPEEL_CODESIZE_THRESHOLD): Add.
(PARAM_UNROLLPEEL_HOTNESS_THRESHOLD): Ditto.
* gcc/gcov-dump.c (tag_summary): Dump new summary info.

Index: libgcc/libgcov.c
===
--- libgcc/libgcov.c(revision 189413)
+++ libgcc/libgcov.c(working copy)
@@ -276,6 +276,120 @@ gcov_version (struct gcov_info *ptr, gcov_unsigned
   return 1;
 }
 
+/* Used by qsort to sort gcov values in descending order.  */
+
+static int
+sort_by_reverse_gcov_value (const void *pa, const void *pb)
+{
+  const gcov_type a = *(gcov_type const *)pa;
+  const gcov_type b = *(gcov_type const *)pb;
+
+  if (b > a)
+return 1;
+  else if (b == a)
+return 0;
+  else
+return -1;
+}
+
+/* Determines the number of counters required to cover a given percentage
+   of the total sum of execution counts in the summary, which is then also
+   recorded in SUM.  */
+
+static void
+gcov_compute_cutoff_values (struct gcov_summary *sum)
+{
+  struct gcov_info *gi_ptr;
+  const struct gcov_fn_info *gfi_ptr;
+  const struct gcov_ctr_info *ci_ptr;
+  struct gcov_ctr_summary *cs_ptr;
+  unsigned t_ix, f_ix, i, ctr_info_ix, index;
+  gcov_unsigned_t c_num;
+  gcov_type *value_array;
+  gcov_type cum, cum_cutoff;
+  char *cutoff_str;
+  unsigned cutoff_perc;
+
+#define CUM_CUTOFF_PERCENT_TIMES_10 999
+  cutoff_str = getenv ("GCOV_HOTCODE_CUTOFF_TIMES_10");
+  if (cutoff_str && strlen (cutoff_str))
+cutoff_perc = atoi (cutoff_str);
+  else
+cutoff_perc = CUM_CUTOFF_PERCENT_TIMES_10;
+
+  /* This currently only applies to arc counters.  */
+  t_ix = GCOV_COUNTER_ARCS;
+
+  /* First check if there are any counts recorded for this counter.  */
+  cs_ptr = &(sum->ctrs[t_ix]);
+  if (!cs_ptr->num)
+return;
+
+  /* Determine the cumulative counter val

Re: Fix PR53908

2012-07-11 Thread Steven Bosscher
On Wed, Jul 11, 2012 at 1:24 PM, Bernd Schmidt  wrote:
> We're moving a load across a call since we don't recognize calls as
> memory-clobbering.
>
> Bootstrapping and testing now on 4.7 x86_64-linux, ok everywhere?

Maybe:
+  if (CALL_P (insn)
+ && ! RTL_CONST_OR_PURE_CALL_P (insn))

?

Ciao!
Steven


[patch] Call free_after_parsing earlier

2012-07-11 Thread Steven Bosscher
Hello,

GCC calls free_after_parsing in rest_of_clean_state.
That's way too late, it can be done in free_lang_data_in_cgraph instead.

While there, I noticed a silly loop in final.c, and cleaned that up too.

Bootstrapped&tested on x86_64-unknown-linux-gnu. OK for trunk?

Ciao!
Steven

* final.c (final): Don't loop to find max_uid.
(rest_of_clean_state): Don't call free_after_parsing here.
* tree.c (free_lang_data_in_cgraph): Call free_after_parsing here.

Index: final.c
===
--- final.c (revision 189423)
+++ final.c (working copy)
@@ -1826,7 +1826,6 @@ void
 final (rtx first, FILE *file, int optimize_p)
 {
   rtx insn, next;
-  int max_uid = 0;
   int seen = 0;

   /* Used for -dA dump.  */
@@ -1837,11 +1836,9 @@ final (rtx first, FILE *file, int optimi

   last_ignored_compare = 0;

+#ifdef HAVE_cc0
   for (insn = first; insn; insn = NEXT_INSN (insn))
 {
-  if (INSN_UID (insn) > max_uid)   /* Find largest UID.  */
-   max_uid = INSN_UID (insn);
-#ifdef HAVE_cc0
   /* If CC tracking across branches is enabled, record the insn which
 jumps to each branch only reached from one place.  */
   if (optimize_p && JUMP_P (insn))
@@ -1852,8 +1849,8 @@ final (rtx first, FILE *file, int optimi
  LABEL_REFS (lab) = insn;
}
}
-#endif
 }
+#endif

   init_recog ();

@@ -4500,7 +4497,6 @@ rest_of_clean_state (void)
   init_recog_no_volatile ();

   /* We're done with this function.  Free up memory if we can.  */
-  free_after_parsing (cfun);
   free_after_compilation (cfun);
   return 0;
 }
Index: tree.c
===
--- tree.c  (revision 189423)
+++ tree.c  (working copy)
@@ -5167,16 +5167,19 @@ assign_assembler_name_if_neeeded (tree t


 /* Free language specific information for every operand and expression
-   in every node of the call graph.  This process operates in three stages:
+   in every node of the call graph.  This process operates in four stages:

-   1- Every callgraph node and varpool node is traversed looking for
+   1- Every function is traversed to free any front-end specific
+  data hung from the function's struct function->language.
+
+   2- Every callgraph node and varpool node is traversed looking for
   decls and types embedded in them.  This is a more exhaustive
   search than that done by find_referenced_vars, because it will
   also collect individual fields, decls embedded in types, etc.

-   2- All the decls found are sent to free_lang_data_in_decl.
+   3- All the decls found are sent to free_lang_data_in_decl.

-   3- All the types found are sent to free_lang_data_in_type.
+   4- All the types found are sent to free_lang_data_in_type.

The ordering between decls and types is important because
free_lang_data_in_decl sets assembler names, which includes
@@ -5193,6 +5196,10 @@ free_lang_data_in_cgraph (void)
   unsigned i;
   alias_pair *p;

+  /* Clear out function->language.  */
+  FOR_EACH_FUNCTION (n)
+free_after_parsing (DECL_STRUCT_FUNCTION (n->symbol.decl));
+
   /* Initialize sets and arrays to store referenced decls and types.  */
   fld.pset = pointer_set_create ();
   fld.worklist = NULL;


[patch] Make make_debug_insn_raw and make_jump_insn_raw static

2012-07-11 Thread Steven Bosscher
Hello,

They're defined and only used in emit-rtl.c so they can be static
(like make_call_insn_raw).

Bootstrapped&tested on x86_64-unknown-linux-gnu.
Will commit as obvious later this week if no-one objects.

Ciao!
Steven

* emit-rtl.c (make_debug_insn_raw, make_jump_insn_raw): Make static.
* rtl.h (make_debug_insn_raw, make_jump_insn_raw): Remove prototypes.

Index: emit-rtl.c
===
--- emit-rtl.c  (revision 189423)
+++ emit-rtl.c  (working copy)
@@ -149,7 +149,6 @@ static GTY ((if_marked ("ggc_marked_p"),
 #define cur_debug_insn_uid (crtl->emit.x_cur_debug_insn_uid)
 #define first_label_num (crtl->emit.x_first_label_num)

-static rtx make_call_insn_raw (rtx);
 static rtx change_address_1 (rtx, enum machine_mode, rtx, int);
 static void set_used_decls (tree);
 static void mark_label_nuses (rtx);
@@ -3692,7 +3691,7 @@ make_insn_raw (rtx pattern)

 /* Like `make_insn_raw' but make a DEBUG_INSN instead of an insn.  */

-rtx
+static rtx
 make_debug_insn_raw (rtx pattern)
 {
   rtx insn;
@@ -3713,7 +3712,7 @@ make_debug_insn_raw (rtx pattern)

 /* Like `make_insn_raw' but make a JUMP_INSN instead of an insn.  */

-rtx
+static rtx
 make_jump_insn_raw (rtx pattern)
 {
   rtx insn;
Index: rtl.h
===
--- rtl.h   (revision 189424)
+++ rtl.h   (working copy)
@@ -1777,8 +1777,6 @@ extern rtx emit_clobber (rtx);
 extern rtx gen_use (rtx);
 extern rtx emit_use (rtx);
 extern rtx make_insn_raw (rtx);
-extern rtx make_debug_insn_raw (rtx);
-extern rtx make_jump_insn_raw (rtx);
 extern void add_function_usage_to (rtx, rtx);
 extern rtx last_call_insn (void);
 extern rtx previous_insn (rtx);


Re: regrename creates invalid insn

2012-07-11 Thread Bernd Schmidt
On 03/26/2012 06:03 PM, Andreas Schwab wrote:
> Bernd Schmidt  writes:
> 
>> Does 4.7 still have the failure at all?
> 
> Yes, see PR52573.

Well, I still think having both REG_DEAD and REG_UNUSED for the same reg
is bogus, but fixing that causes trouble in reg-stack. It seems the path
of least resistance is to just cope with the situation in regrename.

The following seems to cure the problem with a 4.7 m68k cross. Also
bootstrapped and tested with -frename-registers enabled at -O2 on
x86_64-linux. Ok everywhere?


Bernd
	PR rtl-optimization/52573
	* regrename.c (build_def_use): Ignore REG_DEAD notes if there is a
	REG_UNUSED for the same register.

Index: regrename.c
===
--- regrename.c	(revision 189425)
+++ regrename.c	(working copy)
@@ -1718,7 +1718,8 @@ build_def_use (basic_block bb)
 
 	  /* Step 4: Close chains for registers that die here.  */
 	  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
-	if (REG_NOTE_KIND (note) == REG_DEAD)
+	if (REG_NOTE_KIND (note) == REG_DEAD
+		&& !find_regno_note (insn, REG_UNUSED, REGNO (XEXP (note, 0
 	  {
 		remove_from_hard_reg_set (&live_hard_regs,
 	  GET_MODE (XEXP (note, 0)),


Re: regrename creates invalid insn

2012-07-11 Thread Steven Bosscher
On Wed, Jul 11, 2012 at 11:31 PM, Bernd Schmidt  wrote:
> On 03/26/2012 06:03 PM, Andreas Schwab wrote:
>> Bernd Schmidt  writes:
>>
>>> Does 4.7 still have the failure at all?
>>
>> Yes, see PR52573.
>
> Well, I still think having both REG_DEAD and REG_UNUSED for the same reg
> is bogus, but fixing that causes trouble in reg-stack.

Oh? Before, during, or after reg-stack? IIRC reg-stack re-creates the
notes from scratch for the stack registers.

Is it possible to "filter" one of the notes (REG_DEAD or REG_UNUSED)
out in add_reg_note? IMHO you're right that having both is bogus...

Ciao!
Steven


Re: regrename creates invalid insn

2012-07-11 Thread Bernd Schmidt
On 07/12/2012 12:10 AM, Steven Bosscher wrote:
> On Wed, Jul 11, 2012 at 11:31 PM, Bernd Schmidt  
> wrote:
>> On 03/26/2012 06:03 PM, Andreas Schwab wrote:
>>> Bernd Schmidt  writes:
>>>
 Does 4.7 still have the failure at all?
>>>
>>> Yes, see PR52573.
>>
>> Well, I still think having both REG_DEAD and REG_UNUSED for the same reg
>> is bogus, but fixing that causes trouble in reg-stack.
> 
> Oh? Before, during, or after reg-stack? IIRC reg-stack re-creates the
> notes from scratch for the stack registers.

During - it relies on the ones it finds. Search for a comment mentioning
fix_truncdi in a CLOBBER case.

> Is it possible to "filter" one of the notes (REG_DEAD or REG_UNUSED)
> out in add_reg_note? IMHO you're right that having both is bogus...

Not even necessary, there's already code in df-problems to avoid adding
REG_DEAD when it's already made a REG_UNUSED - but that code is
explicitly disabled for CLOBBERs. Go figure.


Bernd


Re: PR 51094 - fprint_w() in output_addr_const() reinstated

2012-07-11 Thread Hans-Peter Nilsson
On Mon, 9 Jul 2012, Dimitrios Apostolou wrote:
> Since output_addr_const() shows pretty hot in the compiler, I reinstated the
> fprint_w() call in place of fprintf().
>
> This patch relies on two things: 2's complement representation for negative
> int and that HOST_WIDE_INT is at least as large type as long for all platforms
> (will it always be?).
>
> Bootstrapped/tested on i386, regtested on x86_64 multilib, i386-pc-solaris2.10
> (thanks ro), i686-darwin9 (thanks iains).
>
>
> 2012-07-09 Dimitrios Apostolou 
>
> * final.c, output.h (fprint_w): New function to write a
> HOST_WIDE_INT to a file, fast.
> * final.c (output_addr_const): Use fprint_w() instead of
> fprintf().
> (sprint_ul_rev): New static function to write a
> HOST_WIDE_INT to a string in reverse, fast.

(Non-inlined patch, so I can't quote the patch in pine.)

Non-approver review: looks ok besides a few cases of comment
formatting nits: punctuation at end and two spaces: ".  */"
You had it right in the first sentence.  (The original wasn't
right either.)

Maybe also add a comment about how much faster than fprintf this
is, besides the ", fast".

brgds, H-P


Re: PR 51094 - fprint_w() in output_addr_const() reinstated

2012-07-11 Thread Mike Stump
On Jul 9, 2012, at 12:54 PM, Dimitrios Apostolou wrote:
> Since output_addr_const() shows pretty hot in the compiler, I reinstated the 
> fprint_w() call in place of fprintf().

My review bits...  First there is no guarantee that HOST_WIDE_INT_BITSIZE is 64 
or less, so [20] is unsafe longer term.  You can add an assert that it is 64 or 
less, that way it will be fixed when people bump things up.  long != 
HOST_WIDE_INT, so, that's a type violation, an assert for it would make it 
safer.  For performance, can't help but wonder if it would be faster to process 
the numbers in blocks of 9 digits, so that the wide divisions are fewer are 
farther between and for each one, you could then have 9 32-bit divisions which 
should be a bit cheaper; but, that's an issue for sprint_ul_rev, not your code.


Re: new sign/zero extension elimination pass

2012-07-11 Thread Kenneth Zadeck

Tom,

I have a problem with the approach that you have taken here.   I believe 
that this could be a very useful addition to gcc so I am in general very 
supportive, but i think you are missing an important case.


My problem is that it the pass does not actually look at the target and 
make any decisions based on that target.


for instance, we have a llp64 target.   As with many targets, the target 
has a rich set of compare and branch instructions.  In particular, it 
can do both 32 and 64 bit comparisons.We see that many of the 
upstream optimizations that take int (SI mode) index variables generate 
extension operations before doing 64 bit compare and branch 
instructions, even though there are 32 bit comparison and branches on 
the machine. There are a lot of machines that can do more than one 
size of comparison.


This optimization pass, as it is currently written will not remove those 
extensions because it believes that the length of the destination is the 
"final answer" unless it is wrapped in an explicit truncation.
Instead it needs to ask the port if there is a shorted compare and 
branch instruction that does not cost more. in that case, those 
instructions should be rewritten to use the shorted compare and branch.


There are many operations other than compare and branch where the pass 
should be asking "can i shorten the target for free and therefore get 
rid of the extension?"   right shifts, rotates, and stores are not in 
this class, but left shifts are as are all comparisons, compare and 
branches, conditional moves.   There may even be machines that have this 
for divide, but i do not know of any off the top of my head.


What i am suggesting moves this pass into the target specific set of 
optimizations rather than target independent set, but at where this pass 
is to be put this is completely appropriate.Any dest instruction 
where all of the operands have been extended should be checked to see if 
it was really necessary to use the longer form before doing the 
propagation pass.


kenny


On 07/11/2012 06:30 AM, Tom de Vries wrote:

On 13/11/10 10:50, Eric Botcazou wrote:

I profiled the pass on spec2000:

-mabi=32 -mabi=64
ee-pass (usr time): 0.70 1.16
total   (usr time):   919.30   879.26
ee-pass(%): 0.08 0.13

The pass takes 0.13% or less of the total usr runtime.

For how many hits?  What are the numbers with --param ee-max-propagate=0?


Is it necessary to improve the runtime of this pass?

I've already given my opinion about the implementation.  The other passes in
the compiler try hard not to rescan everything when a single bit changes; as
currently written, yours doesn't.


Eric,

I've done the following:
- refactored the pass such that it now scans at most twice over all
  instructions.
- updated the patch to be applicable to current trunk
- updated the motivating example to a more applicable one (as discussed in
  this thread), and added that one as test-case.
- added a part in the header comment illustrating the working of the pass
  on the motivating example.

bootstrapped and reg-tested on x86_64 and i686.

build and reg-tested on mips, mips64, and arm.

OK for trunk?

Thanks,
- Tom

2012-07-10  Tom de Vries  

* ee.c: New file.
* tree-pass.h (pass_ee): Declare.
* opts.c ( default_options_table): Set flag_ee at -O2.
* timevar.def (TV_EE): New timevar.
* common.opt (fextension-elimination): New option.
* Makefile.in (ee.o): New rule.
* passes.c (pass_ee): Add it.

* gcc.dg/extend-1.c: New test.
* gcc.dg/extend-2.c: Same.
* gcc.dg/extend-2-64.c: Same.
* gcc.dg/extend-3.c: Same.
* gcc.dg/extend-4.c: Same.
* gcc.dg/extend-5.c: Same.
* gcc.target/mips/octeon-bbit-2.c: Make test more robust.
Index: gcc/tree-pass.h
===
--- gcc/tree-pass.h (revision 189409)
+++ gcc/tree-pass.h (working copy)
@@ -483,6 +483,7 @@ extern struct gimple_opt_pass pass_fixup

extern struct rtl_opt_pass pass_expand;
extern struct rtl_opt_pass pass_instantiate_virtual_regs;
+extern struct rtl_opt_pass pass_ee;
extern struct rtl_opt_pass pass_rtl_fwprop;
extern struct rtl_opt_pass pass_rtl_fwprop_addr;
extern struct rtl_opt_pass pass_jump;
Index: gcc/testsuite/gcc.target/mips/octeon-bbit-2.c
===
--- gcc/testsuite/gcc.target/mips/octeon-bbit-2.c (revision 189409)
+++ gcc/testsuite/gcc.target/mips/octeon-bbit-2.c (working copy)
@@ -5,19 +5,19 @@
/* { dg-final { scan-assembler "\tbnel\t" } } */
/* { dg-final { scan-assembler-not "\tbne\t" } } */

-NOMIPS16 int
-f (int n, int i)
+NOMIPS16 long int
+f (long int n, long int i)
{
-  int s = 0;
+  long int s = 0;
   for (; i & 1; i++)
 s += i;
   return s;
}

-NOMIPS16 int
-g (int n, int i)
+NOMIPS16 long int
+g (long int n, long int i)
{
-  int 

Re: [PATCH] [LM32] Fix lm32-elf-gcc build error by remove unnecessary constant legitimate check.

2012-07-11 Thread Hans-Peter Nilsson
On Tue, 10 Jul 2012, Jia Liu wrote:
> Hi all,
>
> When I build lm32-elf-gcc, it failed at libgcc configure due to
> lm32-elf-cc1 segment fault when compile conftest.c:
>
> void bar ();
> void clean (int *);
> void foo ()
> {
>   int i __attribute__ ((cleanup (clean)));
>   bar();
> }
>
> Then I find lm32_legitimate_constant_p return false too much times, it
> shouldn't like this, I think.
>
> And I find the movsi pattern has handle the pic and reloc_operand, but
> lm32_legitimate_constant_p
> handle them again, so, I think maybe it is unnecessary.

The movsi pattern really is expected to handle it; the
TARGET_LEGITIMATE_CONSTANT_P (i.e. lm32_legitimate_constant_p)
is for immediate operands to *other* insns.  The movsi pattern
is expected to handle the rest.  The correct solution is
elsewhere.

> When I remove the unnecessary constant legitimate check, lm32-elf-gcc
> is built OK.

But most likely will fail to generate correct code for some
source codes for which it worked before.

A patch like this needs a full test-suite run anyway.

(Not an approver-review.)

brgds, H-P


[PATCH 0/6] Thread pointer built-in functions

2012-07-11 Thread Chung-Lin Tang
Hi, following discussion here:
http://gcc.gnu.org/ml/gcc-patches/2012-07/msg00229.html
(and a few other mails in the thread between Richard Sandiford)

A number of targets implement the same name __builtin_thread_pointer(),
__builtin_set_thread_pointer() TLS builtins. This set of patches changes
them into machine-independent builtins, implemented by target hooks.

Currently, the backend interface does not seem to expose any notion of
TLS registers, hence these functions are not really implementable by the
expanders (the default target hooks are simply sorry()).

I have here the associated backend changes for all ports that have those
builtins implemented, namely alpha, arm, s390, xtensa, plus adding the
mips support which was the original reason I started this. I have CCed
the respective port maintainers in the rest of the patches.

It wasn't convenient for me to build/test the alpha, s390, and xtensa
ports, so please bear with me if you see anything wrong (the patches are
simply straightforward changes associated with the builtin).

Thanks,
Chung-Lin

Full ChangeLog:

2012-07-12  Chung-Lin Tang  

* targhooks.c (default_expand_builtin_thread_pointer): New.
(default_expand_builtin_set_thread_pointer): New.
* targhooks.h (default_expand_builtin_thread_pointer): New.
(default_expand_builtin_set_thread_pointer): New.
* target.def (expand_builtin_thread_pointer): New target hook.
(expand_builtin_set_thread_pointer): New target hook.
* builtins.c (expand_builtin_thread_pointer): New.
(expand_builtin_set_thread_pointer): New.
(expand_builtin): Add BUILT_IN_THREAD_POINTER,
BUILT_IN_SET_THREAD_POINTER expand cases.
* builtins.def (BUILT_IN_THREAD_POINTER):
New __builtin_thread_pointer builtin.
(BUILT_IN_SET_THREAD_POINTER):
New __builtin_set_thread_pointer builtin.
* doc/tm.texi.in: Add BUILT_IN_THREAD_POINTER,
BUILT_IN_SET_THREAD_POINTER hook entries.
* doc/tm.texi: Update.

* config/alpha/alpha.c (alpha_builtin): Remove
ALPHA_BUILTIN_THREAD_POINTER, ALPHA_BUILTIN_SET_THREAD_POINTER.
(code_for_builtin): Remove CODE_FOR_load_tp, CODE_FOR_set_tp.
(alpha_init_builtins): Remove __builtin_thread_pointer,
__builtin_set_thread_pointer machine-specific builtins.
(alpha_expand_builtin_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_THREAD_POINTER.
(alpha_expand_builtin_set_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_SET_THREAD_POINTER.
(alpha_fold_builtin): Remove ALPHA_BUILTIN_THREAD_POINTER,
ALPHA_BUILTIN_SET_THREAD_POINTER cases.

* config/s390/s390.c (s390_builtin,code_for_builtin_64,
code_for_builtin_31,s390_init_builtins,s390_expand_builtin):
Remove.
(s390_expand_builtin_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_THREAD_POINTER.
(s390_expand_builtin_set_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_SET_THREAD_POINTER.

* config/xtensa/xtensa.c
(xtensa_expand_builtin_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_THREAD_POINTER.
(xtensa_expand_builtin_set_thread_pointer): Add hook function
for TARGET_EXPAND_BUILTIN_SET_THREAD_POINTER.
(xtensa_builtin): Remove XTENSA_BUILTIN_THREAD_POINTER and
XTENSA_BUILTIN_SET_THREAD_POINTER.
(xtensa_init_builtins): Remove __builtin_thread_pointer,
__builtin_set_thread_pointer machine-specific builtins.
(xtensa_fold_builtin): Remove XTENSA_BUILTIN_THREAD_POINTER,
XTENSA_BUILTIN_SET_THREAD_POINTER cases.
(xtensa_expand_builtin): Remove XTENSA_BUILTIN_THREAD_POINTER,
XTENSA_BUILTIN_SET_THREAD_POINTER cases.

* config/arm/arm.c (arm_builtins): Remove
ARM_BUILTIN_THREAD_POINTER.
(arm_init_tls_builtins): Remove function.
(arm_init_builtins): Remove call to arm_init_tls_builtins().
(arm_expand_builtin): Remove ARM_BUILTIN_THREAD_POINTER case.
(arm_expand_builtin_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_THREAD_POINTER.

* config/mips/mips.c (mips_get_tp): Add 'target' parameter for
generating to specific reg.
(mips_legitimize_tls_address): Update calls to mips_get_tp().
(mips_expand_builtin_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_THREAD_POINTER.


[PATCH 1/6] Thread pointer built-in functions, core parts

2012-07-11 Thread Chung-Lin Tang
Core parts adding the new hooks. BUILT_IN_THREAD_POINTER and
BUILT_IN_SET_THREAD_POINTER are different hooks, as some targets only
implement one of them (thread pointer read).

Thanks,
Chung-Lin

* targhooks.c (default_expand_builtin_thread_pointer): New.
(default_expand_builtin_set_thread_pointer): New.
* targhooks.h (default_expand_builtin_thread_pointer): New.
(default_expand_builtin_set_thread_pointer): New.
* target.def (expand_builtin_thread_pointer): New target hook.
(expand_builtin_set_thread_pointer): New target hook.
* builtins.c (expand_builtin_thread_pointer): New.
(expand_builtin_set_thread_pointer): New.
(expand_builtin): Add BUILT_IN_THREAD_POINTER,
BUILT_IN_SET_THREAD_POINTER expand cases.
* builtins.def (BUILT_IN_THREAD_POINTER):
New __builtin_thread_pointer builtin.
(BUILT_IN_SET_THREAD_POINTER):
New __builtin_set_thread_pointer builtin.
* doc/tm.texi.in: Add BUILT_IN_THREAD_POINTER,
BUILT_IN_SET_THREAD_POINTER hook entries.
* doc/tm.texi: Update.
Index: target.def
===
--- target.def  (revision 189431)
+++ target.def  (working copy)
@@ -2668,6 +2668,22 @@ DEFHOOK
  enum unwind_info_type, (void),
  default_debug_unwind_info)
 
+/* Expand builtin function for returning TLS thread pointer.  */
+DEFHOOK
+(expand_builtin_thread_pointer,
+ "This hook expands the built-in function for reading\
+ the TLS thread pointer, if supported on the target.",
+ rtx, (rtx),
+ default_expand_builtin_thread_pointer)
+
+/* Expand builtin function for setting TLS thread pointer.  */
+DEFHOOK
+(expand_builtin_set_thread_pointer,
+ "This hook expands the built-in function for setting\
+ the TLS thread pointer, if supported on the target.",
+ void, (rtx),
+ default_expand_builtin_set_thread_pointer)
+
 DEFHOOKPOD
 (atomic_test_and_set_trueval,
  "This value should be set if the result written by\
Index: targhooks.c
===
--- targhooks.c (revision 189431)
+++ targhooks.c (working copy)
@@ -1456,4 +1456,17 @@ default_pch_valid_p (const void *data_p, size_t le
   return NULL;
 }
 
+rtx
+default_expand_builtin_thread_pointer (rtx target ATTRIBUTE_UNUSED)
+{
+  sorry ("__builtin_thread_pointer() not available for this target");
+  return NULL;
+}
+
+void
+default_expand_builtin_set_thread_pointer (rtx val ATTRIBUTE_UNUSED)
+{
+  sorry ("__builtin_set_thread_pointer() not available for this target");
+}
+
 #include "gt-targhooks.h"
Index: targhooks.h
===
--- targhooks.h (revision 189431)
+++ targhooks.h (working copy)
@@ -179,5 +179,8 @@ extern enum machine_mode default_get_reg_raw_mode(
 extern void *default_get_pch_validity (size_t *);
 extern const char *default_pch_valid_p (const void *, size_t);
 
+extern rtx default_expand_builtin_thread_pointer (rtx);
+extern void default_expand_builtin_set_thread_pointer (rtx);
+
 extern void default_asm_output_ident_directive (const char*);
 
Index: builtins.c
===
--- builtins.c  (revision 189431)
+++ builtins.c  (working copy)
@@ -5760,6 +5760,27 @@ expand_builtin_sync_synchronize (void)
   expand_mem_thread_fence (MEMMODEL_SEQ_CST);
 }
 
+static rtx
+expand_builtin_thread_pointer (tree exp, rtx target)
+{
+  if (!validate_arglist (exp, VOID_TYPE))
+return const0_rtx;
+  if (!REG_P (target) || GET_MODE (target) != Pmode)
+target = gen_reg_rtx (Pmode);
+  target = targetm.expand_builtin_thread_pointer (target);
+  return (target ? target : const0_rtx);
+}
+
+static void
+expand_builtin_set_thread_pointer (tree exp)
+{
+  rtx val;
+  if (!validate_arglist (exp, POINTER_TYPE, VOID_TYPE))
+return;
+  val = expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, Pmode, EXPAND_NORMAL);
+  targetm.expand_builtin_set_thread_pointer (val);
+}
+
 
 /* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
@@ -6825,6 +6846,13 @@ expand_builtin (tree exp, rtx target, rtx subtarge
maybe_emit_free_warning (exp);
   break;
 
+case BUILT_IN_THREAD_POINTER:
+  return expand_builtin_thread_pointer (exp, target);
+
+case BUILT_IN_SET_THREAD_POINTER:
+  expand_builtin_set_thread_pointer (exp);
+  return const0_rtx;
+
 default:   /* just do library call, if unknown builtin */
   break;
 }
Index: builtins.def
===
--- builtins.def(revision 189431)
+++ builtins.def(working copy)
@@ -782,6 +782,17 @@ DEF_BUILTIN (BUILT_IN_PROFILE_FUNC_ENTER, "__cyg_p
 DEF_BUILTIN (BUILT_IN_PROFILE_FUNC_EXIT, "__cyg_profile_func_exit", 
BUILT_IN_NORMAL, BT_FN_VOID_PTR_PTR, BT_LAST,
 false, false, false, ATTR_NULL, true, true)
 
+/* TLS thread po

[PATCH 2/6] Thread pointer built-in functions, alpha

2012-07-11 Thread Chung-Lin Tang
Alpha parts. Note that now the machine-independent
__builtin_thread_pointer() is now marked as const/readonly, slightly
different from the original alpha backend code.

Thanks,
Chung-Lin

* config/alpha/alpha.c (alpha_builtin): Remove
ALPHA_BUILTIN_THREAD_POINTER, ALPHA_BUILTIN_SET_THREAD_POINTER.
(code_for_builtin): Remove CODE_FOR_load_tp, CODE_FOR_set_tp.
(alpha_init_builtins): Remove __builtin_thread_pointer,
__builtin_set_thread_pointer machine-specific builtins.
(alpha_expand_builtin_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_THREAD_POINTER.
(alpha_expand_builtin_set_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_SET_THREAD_POINTER.
(alpha_fold_builtin): Remove ALPHA_BUILTIN_THREAD_POINTER,
ALPHA_BUILTIN_SET_THREAD_POINTER cases.
Index: config/alpha/alpha.c
===
--- config/alpha/alpha.c(revision 189431)
+++ config/alpha/alpha.c(working copy)
@@ -6317,8 +6317,6 @@ enum alpha_builtin
   ALPHA_BUILTIN_AMASK,
   ALPHA_BUILTIN_IMPLVER,
   ALPHA_BUILTIN_RPCC,
-  ALPHA_BUILTIN_THREAD_POINTER,
-  ALPHA_BUILTIN_SET_THREAD_POINTER,
   ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
   ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
 
@@ -6374,8 +6372,6 @@ static enum insn_code const code_for_builtin[ALPHA
   CODE_FOR_builtin_amask,
   CODE_FOR_builtin_implver,
   CODE_FOR_builtin_rpcc,
-  CODE_FOR_load_tp,
-  CODE_FOR_set_tp,
   CODE_FOR_builtin_establish_vms_condition_handler,
   CODE_FOR_builtin_revert_vms_condition_handler,
 
@@ -6533,14 +6529,6 @@ alpha_init_builtins (void)
alpha_dimode_u, NULL_TREE);
   alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
 
-  ftype = build_function_type_list (ptr_type_node, NULL_TREE);
-  alpha_builtin_function ("__builtin_thread_pointer", ftype,
- ALPHA_BUILTIN_THREAD_POINTER, ECF_NOTHROW);
-
-  ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
-  alpha_builtin_function ("__builtin_set_thread_pointer", ftype,
- ALPHA_BUILTIN_SET_THREAD_POINTER, ECF_NOTHROW);
-
   if (TARGET_ABI_OPEN_VMS)
 {
   ftype = build_function_type_list (ptr_type_node, ptr_type_node,
@@ -6645,7 +6633,20 @@ alpha_expand_builtin (tree exp, rtx target,
 return const0_rtx;
 }
 
+static rtx
+alpha_expand_builtin_thread_pointer (rtx target)
+{
+  emit_insn (gen_load_tp (target));
+  return target;
+}
 
+static void
+alpha_expand_builtin_set_thread_pointer (rtx val)
+{
+  emit_insn (gen_set_tp (val));
+}
+
+
 /* Several bits below assume HWI >= 64 bits.  This should be enforced
by config.gcc.  */
 #if HOST_BITS_PER_WIDE_INT < 64
@@ -7077,8 +7078,6 @@ alpha_fold_builtin (tree fndecl, int n_args, tree
 case ALPHA_BUILTIN_AMASK:
 case ALPHA_BUILTIN_IMPLVER:
 case ALPHA_BUILTIN_RPCC:
-case ALPHA_BUILTIN_THREAD_POINTER:
-case ALPHA_BUILTIN_SET_THREAD_POINTER:
   /* None of these are foldable at compile-time.  */
 default:
   return NULL;
@@ -9811,6 +9810,11 @@ alpha_conditional_register_usage (void)
 #undef TARGET_EXPAND_BUILTIN_VA_START
 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
 
+#undef TARGET_EXPAND_BUILTIN_THREAD_POINTER
+#define TARGET_EXPAND_BUILTIN_THREAD_POINTER 
alpha_expand_builtin_thread_pointer
+#undef TARGET_EXPAND_BUILTIN_SET_THREAD_POINTER
+#define TARGET_EXPAND_BUILTIN_SET_THREAD_POINTER 
alpha_expand_builtin_set_thread_pointer
+
 /* The Alpha architecture does not require sequential consistency.  See
http://www.cs.umd.edu/~pugh/java/memoryModel/AlphaReordering.html
for an example of how it can be violated in practice.  */


[PATCH 3/6] Thread pointer built-in functions, arm

2012-07-11 Thread Chung-Lin Tang
ARM parts, no further notes.

Thanks,
Chung-Lin

* config/arm/arm.c (arm_builtins): Remove
ARM_BUILTIN_THREAD_POINTER.
(arm_init_tls_builtins): Remove function.
(arm_init_builtins): Remove call to arm_init_tls_builtins().
(arm_expand_builtin): Remove ARM_BUILTIN_THREAD_POINTER case.
(arm_expand_builtin_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_THREAD_POINTER.
Index: config/arm/arm.c
===
--- config/arm/arm.c(revision 189431)
+++ config/arm/arm.c(working copy)
@@ -267,6 +267,8 @@ static int arm_cortex_a5_branch_cost (bool, bool);
 static bool arm_vectorize_vec_perm_const_ok (enum machine_mode vmode,
 const unsigned char *sel);
 
+static rtx arm_expand_builtin_thread_pointer (rtx);
+
 
 /* Table of machine attributes.  */
 static const struct attribute_spec arm_attribute_table[] =
@@ -617,6 +619,10 @@ static const struct attribute_spec arm_attribute_t
 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
   arm_vectorize_vec_perm_const_ok
 
+#undef TARGET_EXPAND_BUILTIN_THREAD_POINTER
+#define TARGET_EXPAND_BUILTIN_THREAD_POINTER \
+  arm_expand_builtin_thread_pointer
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Obstack for minipool constant handling.  */
@@ -18980,8 +18986,6 @@ enum arm_builtins
 
   ARM_BUILTIN_WMERGE,
 
-  ARM_BUILTIN_THREAD_POINTER,
-
   ARM_BUILTIN_NEON_BASE,
 
   ARM_BUILTIN_MAX = ARM_BUILTIN_NEON_BASE + ARRAY_SIZE (neon_builtin_data)
@@ -20021,20 +20025,6 @@ arm_init_iwmmxt_builtins (void)
 }
 
 static void
-arm_init_tls_builtins (void)
-{
-  tree ftype, decl;
-
-  ftype = build_function_type (ptr_type_node, void_list_node);
-  decl = add_builtin_function ("__builtin_thread_pointer", ftype,
-  ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
-  NULL, NULL_TREE);
-  TREE_NOTHROW (decl) = 1;
-  TREE_READONLY (decl) = 1;
-  arm_builtin_decls[ARM_BUILTIN_THREAD_POINTER] = decl;
-}
-
-static void
 arm_init_fp16_builtins (void)
 {
   tree fp16_type = make_node (REAL_TYPE);
@@ -20046,8 +20036,6 @@ arm_init_fp16_builtins (void)
 static void
 arm_init_builtins (void)
 {
-  arm_init_tls_builtins ();
-
   if (TARGET_REALLY_IWMMXT)
 arm_init_iwmmxt_builtins ();
 
@@ -21150,9 +21138,6 @@ arm_expand_builtin (tree exp,
}
   return arm_expand_binop_builtin (icode, exp, target);
 
-case ARM_BUILTIN_THREAD_POINTER:
-  return arm_load_tp (target);
-
 default:
   break;
 }
@@ -26331,4 +26316,10 @@ arm_validize_comparison (rtx *comparison, rtx * op
 
 }
 
+static rtx
+arm_expand_builtin_thread_pointer (rtx target)
+{
+  return arm_load_tp (target);
+}
+
 #include "gt-arm.h"


[PATCH 4/6] Thread pointer built-in functions, s390

2012-07-11 Thread Chung-Lin Tang
S390 parts. In this patch, because the thread-pointer builtins were the
only machine-dependent builtins in the s390 backend, I have removed
basically all the init/expand builtin hook code. If the s390 maintainers
want to keep the code for possible future backend builtins, this patch
might need to be revised.

Also note that __builtin_thread_pointer is now marked as nothrow/const,
and _builtin_set_thread_pointer as nothrow, different from what I saw in
the original s390 code.

Thanks,
Chung-Lin

* config/s390/s390.c (s390_builtin,code_for_builtin_64,
code_for_builtin_31,s390_init_builtins,s390_expand_builtin):
Remove.
(s390_expand_builtin_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_THREAD_POINTER.
(s390_expand_builtin_set_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_SET_THREAD_POINTER.
Index: config/s390/s390.c
===
--- config/s390/s390.c  (revision 189431)
+++ config/s390/s390.c  (working copy)
@@ -9121,132 +9121,21 @@ s390_gimplify_va_arg (tree valist, tree type, gimp
   return build_va_arg_indirect_ref (addr);
 }
 
-
-/* Builtins.  */
-
-enum s390_builtin
+static rtx
+s390_expand_builtin_thread_pointer (rtx target)
 {
-  S390_BUILTIN_THREAD_POINTER,
-  S390_BUILTIN_SET_THREAD_POINTER,
+  emit_insn (TARGET_64BIT
+? gen_get_tp_64 (target) : gen_get_tp_31 (target));
+  return target;
+}
 
-  S390_BUILTIN_max
-};
-
-static enum insn_code const code_for_builtin_64[S390_BUILTIN_max] = {
-  CODE_FOR_get_tp_64,
-  CODE_FOR_set_tp_64
-};
-
-static enum insn_code const code_for_builtin_31[S390_BUILTIN_max] = {
-  CODE_FOR_get_tp_31,
-  CODE_FOR_set_tp_31
-};
-
 static void
-s390_init_builtins (void)
+s390_expand_builtin_set_thread_pointer (rtx val)
 {
-  tree ftype;
-
-  ftype = build_function_type_list (ptr_type_node, NULL_TREE);
-  add_builtin_function ("__builtin_thread_pointer", ftype,
-   S390_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
-   NULL, NULL_TREE);
-
-  ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
-  add_builtin_function ("__builtin_set_thread_pointer", ftype,
-   S390_BUILTIN_SET_THREAD_POINTER, BUILT_IN_MD,
-   NULL, NULL_TREE);
+  emit_insn (TARGET_64BIT
+? gen_set_tp_64 (val) : gen_set_tp_31 (val));
 }
 
-/* Expand an expression EXP that calls a built-in function,
-   with result going to TARGET if that's convenient
-   (and in mode MODE if that's convenient).
-   SUBTARGET may be used as the target for computing one of EXP's operands.
-   IGNORE is nonzero if the value is to be ignored.  */
-
-static rtx
-s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
-enum machine_mode mode ATTRIBUTE_UNUSED,
-int ignore ATTRIBUTE_UNUSED)
-{
-#define MAX_ARGS 2
-
-  enum insn_code const *code_for_builtin =
-TARGET_64BIT ? code_for_builtin_64 : code_for_builtin_31;
-
-  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
-  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
-  enum insn_code icode;
-  rtx op[MAX_ARGS], pat;
-  int arity;
-  bool nonvoid;
-  tree arg;
-  call_expr_arg_iterator iter;
-
-  if (fcode >= S390_BUILTIN_max)
-internal_error ("bad builtin fcode");
-  icode = code_for_builtin[fcode];
-  if (icode == 0)
-internal_error ("bad builtin fcode");
-
-  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
-
-  arity = 0;
-  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
-{
-  const struct insn_operand_data *insn_op;
-
-  if (arg == error_mark_node)
-   return NULL_RTX;
-  if (arity > MAX_ARGS)
-   return NULL_RTX;
-
-  insn_op = &insn_data[icode].operand[arity + nonvoid];
-
-  op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
-
-  if (!(*insn_op->predicate) (op[arity], insn_op->mode))
-   op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
-  arity++;
-}
-
-  if (nonvoid)
-{
-  enum machine_mode tmode = insn_data[icode].operand[0].mode;
-  if (!target
- || GET_MODE (target) != tmode
- || !(*insn_data[icode].operand[0].predicate) (target, tmode))
-   target = gen_reg_rtx (tmode);
-}
-
-  switch (arity)
-{
-case 0:
-  pat = GEN_FCN (icode) (target);
-  break;
-case 1:
-  if (nonvoid)
-pat = GEN_FCN (icode) (target, op[0]);
-  else
-   pat = GEN_FCN (icode) (op[0]);
-  break;
-case 2:
-  pat = GEN_FCN (icode) (target, op[0], op[1]);
-  break;
-default:
-  gcc_unreachable ();
-}
-  if (!pat)
-return NULL_RTX;
-  emit_insn (pat);
-
-  if (nonvoid)
-return target;
-  else
-return const0_rtx;
-}
-
-
 /* Output assembly code for the trampoline template to
stdio stream FILE.
 
@@ -10689,10 +10578,10 @@ s390_loop_unroll_adjust (unsigned nunroll, struct
 #undef TA

[PATCH 5/6] Thread pointer built-in functions, xtensa

2012-07-11 Thread Chung-Lin Tang
xtensa parts. No other notes.

Thanks,
Chung-Lin

* config/xtensa/xtensa.c
(xtensa_expand_builtin_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_THREAD_POINTER.
(xtensa_expand_builtin_set_thread_pointer): Add hook function
for TARGET_EXPAND_BUILTIN_SET_THREAD_POINTER.
(xtensa_builtin): Remove XTENSA_BUILTIN_THREAD_POINTER and
XTENSA_BUILTIN_SET_THREAD_POINTER.
(xtensa_init_builtins): Remove __builtin_thread_pointer,
__builtin_set_thread_pointer machine-specific builtins.
(xtensa_fold_builtin): Remove XTENSA_BUILTIN_THREAD_POINTER,
XTENSA_BUILTIN_SET_THREAD_POINTER cases.
(xtensa_expand_builtin): Remove XTENSA_BUILTIN_THREAD_POINTER,
XTENSA_BUILTIN_SET_THREAD_POINTER cases.
Index: config/xtensa/xtensa.c
===
--- config/xtensa/xtensa.c  (revision 189431)
+++ config/xtensa/xtensa.c  (working copy)
@@ -158,6 +158,8 @@ static unsigned int xtensa_function_arg_boundary (
 static void xtensa_init_builtins (void);
 static tree xtensa_fold_builtin (tree, int, tree *, bool);
 static rtx xtensa_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+static rtx xtensa_expand_builtin_thread_pointer (rtx);
+static void xtensa_expand_builtin_set_thread_pointer (rtx);
 static void xtensa_va_start (tree, rtx);
 static bool xtensa_frame_pointer_required (void);
 static rtx xtensa_static_chain (const_tree, bool);
@@ -258,6 +260,13 @@ static const int reg_nonleaf_alloc_order[FIRST_PSE
 #undef  TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN xtensa_expand_builtin
 
+#undef TARGET_EXPAND_BUILTIN_THREAD_POINTER
+#define TARGET_EXPAND_BUILTIN_THREAD_POINTER \
+  xtensa_expand_builtin_thread_pointer
+#undef TARGET_EXPAND_BUILTIN_SET_THREAD_POINTER
+#define TARGET_EXPAND_BUILTIN_SET_THREAD_POINTER \
+  xtensa_expand_builtin_set_thread_pointer
+
 #undef  TARGET_PREFERRED_RELOAD_CLASS
 #define TARGET_PREFERRED_RELOAD_CLASS xtensa_preferred_reload_class
 #undef  TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
@@ -3057,8 +3066,6 @@ xtensa_gimplify_va_arg_expr (tree valist, tree typ
 enum xtensa_builtin
 {
   XTENSA_BUILTIN_UMULSIDI3,
-  XTENSA_BUILTIN_THREAD_POINTER,
-  XTENSA_BUILTIN_SET_THREAD_POINTER,
   XTENSA_BUILTIN_max
 };
 
@@ -3077,23 +3084,6 @@ xtensa_init_builtins (void)
   "__umulsidi3", NULL_TREE);
   TREE_NOTHROW (decl) = 1;
   TREE_READONLY (decl) = 1;
-
-  if (TARGET_THREADPTR)
-{
-  ftype = build_function_type_list (ptr_type_node, NULL_TREE);
-  decl = add_builtin_function ("__builtin_thread_pointer", ftype,
-  XTENSA_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
-  NULL, NULL_TREE);
-  TREE_READONLY (decl) = 1;
-  TREE_NOTHROW (decl) = 1;
-
-  ftype = build_function_type_list (void_type_node, ptr_type_node,
-   NULL_TREE);
-  decl = add_builtin_function ("__builtin_set_thread_pointer", ftype,
-  XTENSA_BUILTIN_SET_THREAD_POINTER,
-  BUILT_IN_MD, NULL, NULL_TREE);
-  TREE_NOTHROW (decl) = 1;
-}
 }
 
 
@@ -3116,10 +3106,6 @@ xtensa_fold_builtin (tree fndecl, int n_args ATTRI
fold_convert (unsigned_intDI_type_node, arg1));
   break;
 
-case XTENSA_BUILTIN_THREAD_POINTER:
-case XTENSA_BUILTIN_SET_THREAD_POINTER:
-  break;
-
 default:
   internal_error ("bad builtin code");
   break;
@@ -3147,25 +3133,25 @@ xtensa_expand_builtin (tree exp, rtx target,
 implement it.  If not, just call the function.  */
   return expand_call (exp, target, ignore);
 
-case XTENSA_BUILTIN_THREAD_POINTER:
-  if (!target || !register_operand (target, Pmode))
-   target = gen_reg_rtx (Pmode);
-  emit_insn (gen_load_tp (target));
-  return target;
-
-case XTENSA_BUILTIN_SET_THREAD_POINTER:
-  arg = expand_normal (CALL_EXPR_ARG (exp, 0));
-  if (!register_operand (arg, Pmode))
-   arg = copy_to_mode_reg (Pmode, arg);
-  emit_insn (gen_set_tp (arg));
-  return const0_rtx;
-
 default:
   internal_error ("bad builtin code");
 }
   return NULL_RTX;
 }
 
+static rtx
+xtensa_expand_builtin_thread_pointer (rtx target)
+{
+  emit_insn (gen_load_tp (target));
+  return target;
+}
+
+static void
+xtensa_expand_builtin_set_thread_pointer (rtx val)
+{
+  emit_insn (gen_set_tp (val));
+}
+
 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.  */
 
 static reg_class_t


[PATCH 6/6] Thread pointer built-in functions, mips

2012-07-11 Thread Chung-Lin Tang
Finally, what I personally need, the MIPS parts.

Thanks,
Chung-Lin

* config/mips/mips.c (mips_get_tp): Add 'target' parameter for
generating to specific reg.
(mips_legitimize_tls_address): Update calls to mips_get_tp().
(mips_expand_builtin_thread_pointer): Add hook function for
TARGET_EXPAND_BUILTIN_THREAD_POINTER.
Index: config/mips/mips.c
===
--- config/mips/mips.c  (revision 189431)
+++ config/mips/mips.c  (working copy)
@@ -2851,11 +2851,12 @@ mips_call_tls_get_addr (rtx sym, enum mips_symbol_
 /* Return a pseudo register that contains the current thread pointer.  */
 
 static rtx
-mips_get_tp (void)
+mips_get_tp (rtx target)
 {
-  rtx tp, fn;
+  rtx fn;
+  rtx tp = (target != NULL_RTX && REG_P (target)
+   ? target : gen_reg_rtx (Pmode));
 
-  tp = gen_reg_rtx (Pmode);
   if (TARGET_MIPS16)
 {
   mips_need_mips16_rdhwr_p = true;
@@ -2919,7 +2920,7 @@ mips_legitimize_tls_address (rtx loc)
   break;
 
 case TLS_MODEL_INITIAL_EXEC:
-  tp = mips_get_tp ();
+  tp = mips_get_tp (NULL_RTX);
   tmp1 = gen_reg_rtx (Pmode);
   tmp2 = mips_unspec_address (loc, SYMBOL_GOTTPREL);
   if (Pmode == DImode)
@@ -2931,7 +2932,7 @@ mips_legitimize_tls_address (rtx loc)
   break;
 
 case TLS_MODEL_LOCAL_EXEC:
-  tmp1 = mips_get_tp ();
+  tmp1 = mips_get_tp (NULL_RTX);
   offset = mips_unspec_address (loc, SYMBOL_TPREL);
   if (mips_split_p[SYMBOL_TPREL])
{
@@ -13884,6 +13885,13 @@ mips_expand_builtin (tree exp, rtx target, rtx sub
 }
   gcc_unreachable ();
 }
+
+static rtx
+mips_expand_builtin_thread_pointer (rtx target)
+{
+  return mips_get_tp (target);
+}
+
 
 /* An entry in the MIPS16 constant pool.  VALUE is the pool constant,
MODE is its mode, and LABEL is the CODE_LABEL associated with it.  */
@@ -17567,6 +17575,10 @@ mips_expand_vec_minmax (rtx target, rtx op0, rtx o
 #undef TARGET_EXPAND_BUILTIN
 #define TARGET_EXPAND_BUILTIN mips_expand_builtin
 
+#undef TARGET_EXPAND_BUILTIN_THREAD_POINTER
+#define TARGET_EXPAND_BUILTIN_THREAD_POINTER \
+  mips_expand_builtin_thread_pointer
+
 #undef TARGET_HAVE_TLS
 #define TARGET_HAVE_TLS HAVE_AS_TLS