[committed] Add var in push_fields_onto_fieldstack

2015-10-27 Thread Tom de Vries

Hi,

this patch adds a new variable in push_fields_onto_fieldstack, improving 
readability.


Bootstrapped and reg-tested on x86_64.

Committed to trunk as obvious.

Thanks,
- Tom
Add var in push_fields_onto_fieldstack

2015-10-27  Tom de Vries  

	* tree-ssa-structalias.c (push_fields_onto_fieldstack): Add and use var
	field_type.
---
 gcc/tree-ssa-structalias.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 8d86dcb..9632ce1 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5313,13 +5313,14 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
   {
 	bool push = false;
 	HOST_WIDE_INT foff = bitpos_of_field (field);
+	tree field_type = TREE_TYPE (field);
 
 	if (!var_can_have_subvars (field)
-	|| TREE_CODE (TREE_TYPE (field)) == QUAL_UNION_TYPE
-	|| TREE_CODE (TREE_TYPE (field)) == UNION_TYPE)
+	|| TREE_CODE (field_type) == QUAL_UNION_TYPE
+	|| TREE_CODE (field_type) == UNION_TYPE)
 	  push = true;
 	else if (!push_fields_onto_fieldstack
-		(TREE_TYPE (field), fieldstack, offset + foff)
+		(field_type, fieldstack, offset + foff)
 		 && (DECL_SIZE (field)
 		 && !integer_zerop (DECL_SIZE (field
 	  /* Empty structures may have actual size, like in C++.  So
@@ -5372,8 +5373,8 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 		e.may_have_pointers = true;
 		e.only_restrict_pointers
 		  = (!has_unknown_size
-		 && POINTER_TYPE_P (TREE_TYPE (field))
-		 && TYPE_RESTRICT (TREE_TYPE (field)));
+		 && POINTER_TYPE_P (field_type)
+		 && TYPE_RESTRICT (field_type));
 		fieldstack->safe_push (e);
 	  }
 	  }
-- 
1.9.1



[PATCH, PR67742] Handle restrict struct fields recursively

2015-10-27 Thread Tom de Vries
[ was: Re: [PATCH, 1/2] Add handle_param parameter to 
create_variable_info_for_1 ]


On 26/10/15 17:23, Tom de Vries wrote:

 Why does create_variable_info_for_1 only handle
the single-field case?  That is, I expected this to be handled by
c_v_r_f_1
fully.



Yep, that's the goal of PR67742. I've written a patch in an earlier
version of the patch series that implements that, I'm currently porting
that patch to this patch series. I'll post asap.


This is the follow-up patch that handles restrict parameters 
recursively, also in struct fields.


Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom
Handle restrict struct fields recursively

2015-10-26  Tom de Vries  

	* tree-ssa-structalias.c (struct fieldoff): Add restrict_var field.
	(push_fields_onto_fieldstack): Add and handle handle_param parameter.
	(create_variable_info_for_1): Add extra arg to call to
	push_fields_onto_fieldstack.  Add type_contains_placeholder_p test.
	Handle restrict pointer fields.
	(intra_create_variable_infos): Call create_variable_info_for_1 with
	handle_param == true.  Call make_restrict_var_constraints.

	* gcc.dg/tree-ssa/restrict-8.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c | 17 +++
 gcc/tree-ssa-structalias.c | 47 +-
 2 files changed, 56 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
new file mode 100644
index 000..b0ab164
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+struct s
+{
+  int *__restrict__ *__restrict__ pp;
+};
+
+int
+f (struct s s, int *b)
+{
+  *b = 1;
+  **s.pp = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 344c3b2..2031175 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -399,6 +399,7 @@ new_var_info (tree t, const char *name)
   return ret;
 }
 
+static varinfo_t create_variable_info_for_1 (tree, const char *, bool);
 
 /* A map mapping call statements to per-stmt variables for uses
and clobbers specific to the call.  */
@@ -5196,6 +5197,8 @@ struct fieldoff
   unsigned may_have_pointers : 1;
 
   unsigned only_restrict_pointers : 1;
+
+  varinfo_t restrict_var;
 };
 typedef struct fieldoff fieldoff_s;
 
@@ -5290,11 +5293,12 @@ field_must_have_pointers (tree t)
OFFSET is used to keep track of the offset in this entire
structure, rather than just the immediately containing structure.
Returns false if the caller is supposed to handle the field we
-   recursed for.  */
+   recursed for.  If HANDLE_PARAM is set, we're handling part of a function
+   parameter.  */
 
 static bool
 push_fields_onto_fieldstack (tree type, vec *fieldstack,
-			 HOST_WIDE_INT offset)
+			 HOST_WIDE_INT offset, bool handle_param)
 {
   tree field;
   bool empty_p = true;
@@ -5320,7 +5324,7 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	|| TREE_CODE (field_type) == UNION_TYPE)
 	  push = true;
 	else if (!push_fields_onto_fieldstack
-		(field_type, fieldstack, offset + foff)
+		(field_type, fieldstack, offset + foff, handle_param)
 		 && (DECL_SIZE (field)
 		 && !integer_zerop (DECL_SIZE (field
 	  /* Empty structures may have actual size, like in C++.  So
@@ -5341,7 +5345,8 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	if (!pair
 		&& offset + foff != 0)
 	  {
-		fieldoff_s e = {0, offset + foff, false, false, false, false};
+		fieldoff_s e = {0, offset + foff, false, false, false, false,
+NULL};
 		pair = fieldstack->safe_push (e);
 	  }
 
@@ -5375,6 +5380,19 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 		  = (!has_unknown_size
 		 && POINTER_TYPE_P (field_type)
 		 && TYPE_RESTRICT (field_type));
+		if (handle_param
+		&& e.only_restrict_pointers
+		&& !type_contains_placeholder_p (TREE_TYPE (field_type)))
+		  {
+		varinfo_t rvi;
+		tree heapvar = build_fake_var_decl (TREE_TYPE (field_type));
+		DECL_EXTERNAL (heapvar) = 1;
+		rvi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS",
+		  true);
+		rvi->is_restrict_var = 1;
+		insert_vi_for_tree (heapvar, rvi);
+		e.restrict_var = rvi;
+		  }
 		fieldstack->safe_push (e);
 	  }
 	  }
@@ -5679,7 +5697,7 @@ create_variable_info_for_1 (tree decl, const char *name, bool handle_param)
   bool notokay = false;
   unsigned int i;
 
-  push_fields_onto_fieldstack (decl_type, &fieldstack, 0);
+  push_fields_onto_fieldstack (decl_type, &fieldstack, 0, handle_param);
 
   for (i = 0; !notokay && fieldstack.iterate (i, &fo); i++)
 	if (fo->has_unknown_size
@@ -5722,6 +5740,7 @@ create_variable_info_for_1 (tree decl, const char *

Re: [OpenACC 5/11] C++ FE changes

2015-10-27 Thread Jakub Jelinek
On Mon, Oct 26, 2015 at 03:35:20PM -0700, Cesar Philippidis wrote:
> I used that generic message for all of those clauses except for _GANG,
> _WORKER and _VECTOR. The gang clause, at the very least, needed it to
> disambiguate the static and num arguments. If you want I can handle
> _WORKER and _VECTOR with the generic message. I only included it because
> those arguments are optional, whereas they are mandatory for the other
> clauses.
> 
> Is this patch OK for trunk?

Ok.

Jakub


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Jakub Jelinek
On Mon, Oct 26, 2015 at 03:32:45PM -0700, Nathan Sidwell wrote:
> Richard, Jakub,
> this updates patch 1 to use the target-insns.def mechanism of detecting
> conditionally-implemented instructions.  Otherwise it's the same as
> yesterday's patch.  To recap:
> 
> 1) Moved the subcodes to an enumeration in internal-fn.h
> 
> 2) Remove ECF_LEAF
> 
> 3) Added check in initialize_ctrl_altering
> 
> 4) tracer code now (continues) to only look in last stmt of block
> 
> I looked at fnsplit and do not believe I need changes there.  That's
> changing things like:
>   if (cheap test)
> do cheap thing
>   else
> do complex thing
> 
> to break out the else part into a separate function.   That's fine -- it'll
> copy the whole CFG of interest.

The question is if some UNIQUE call could be ever considered as part of the
cheap test or do cheap thing.  If not, everything is fine of course for
fnsplit.

> ok?

Ok for me, but please wait for Richi's ack too.

Jakub


Re: [OpenACC 7/11] execution model

2015-10-27 Thread Jakub Jelinek
On Mon, Oct 26, 2015 at 04:11:20PM -0700, Nathan Sidwell wrote:
> Jakub, Richard,
> This is the updated version of patch 7, using target-insns.def for the new
> insns.  Otherwise same as yesterday's, which had the following changes:
> 
> The significant change is that now the head/tail unique markers are
> threaded on a data dependency variable.  I'd not  noticed its lack being a
> problem, but this is certainly more robust in showing the ordering
> dependency between calls.  The dependency var is the 2nd parameter, and all
> others are simply shifted along by one.
> 
> At RTL generation time the date dependency is exposed to the RTL expander,
> which in the PTX case simply does a src->dst move, which will eventually be
> deleted as unnecessary.
> 
> ok?

LGTM, though could I ask you to try to try to move the
struct oacc_collapse
expand_oacc_collapse_init
expand_oacc_collapse_vars
expand_oacc_for
additions somewhere else
(e.g. in between expand_omp_taskreg and expand_omp_for_init_counts),
because it seems patch just got too confused and gave up, so most of
expand_omp_for which I assume is unchanged except for
> +  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
> +{
> +  gcc_assert (!inner_stmt);
> +  expand_oacc_for (region, &fd);
> +}
addition is considered to be deleted in one place and added into another
one; if patch does this, I'd be afraid svn blame or git blame would do so
too, and thus lose history for expand_omp_for.  If moving it around doesn't
help, no big deal, but if it helps, it would be appreciated.

>   (is_oacc_parallel, is_oaccc_kernels): New.

One too many Cs.

Jakub


Re: Drop types_compatible_p from operand_equal_p

2015-10-27 Thread Richard Biener
On Mon, 26 Oct 2015, Jan Hubicka wrote:

> > On Sat, 24 Oct 2015, Jan Hubicka wrote:
> > 
> > > Hi, as discussed earlier, the types_compatible_p in operand_equal_p 
> > > seems redundant. (it is callers work to figure out how much of type 
> > > matching it wants.  If not, we probably want to treat most of other 
> > > references and casts simlar way).
> > > 
> > > Bootstrapped/regtested x86_64-linux. OK?
> > 
> > Ok.
> > 
> > Btw, you need to audit tree hashing for required changes with respect
> > to your ones to operand_equal_p.  operand_equal_p is the equality
> > function for it.
> 
> Yep, I am aware that tree hasing must match.  I think my changes are safe so
> far:
>  - ctors are already hashed resonably
>  - types are not hashed so the changes strenghtening OEP_ADDRESS_OF are safe
>  - OEP_ADDRESS_OF (so far) still boils down to syntactic matching. 
> 
> Looking at this I noticed that simple_cst_equal in tree.c seems to reimplement
> OEP_CONSTANT part of operand_equal_p and it seems to have bugs - i.e. not
> comparing index for CONSTRUCTOR ELTs.

simple_cst_equal is also quite incomplete and I'd rather have it die...

> Also I was always bit unsure how the path through operand_equal_p that allows
> different tree codes to match (stripping MEM_REF) and use of STRIP_NOPS 
> combine
> with add_expr that doesn't do these.  Do we have some mechanism that will
> prevent this from corrupting hashtables?

We should ensure that equal trees get equal hash codes (heh, probably
easy to add a wrapper around operand_equal_p double-checking that).

Richard.

> Honza
> > 
> > Thanks,
> > Richard.
> > 
> > >   * fold-const.c (operand_equal_p): Drop types_compatible_p when
> > >   comparing references.
> > > 
> > > Index: fold-const.c
> > > ===
> > > --- fold-const.c  (revision 229278)
> > > +++ fold-const.c  (working copy)
> > > @@ -2982,9 +2982,6 @@ operand_equal_p (const_tree arg0, const_
> > >  TYPE_SIZE (TREE_TYPE (arg1)),
> > >  flags)))
> > >   return 0;
> > > -   /* Verify that access happens in similar types.  */
> > > -   if (!types_compatible_p (TREE_TYPE (arg0), TREE_TYPE (arg1)))
> > > - return 0;
> > > /* Verify that accesses are TBAA compatible.  */
> > > if (flag_strict_aliasing
> > > && (!alias_ptr_types_compatible_p
> > > 
> > > 
> > 
> > -- 
> > Richard Biener 
> > SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
> > 21284 (AG Nuernberg)
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PR c/64765, c/64880] Support OpenACC Combined Directives in C, C++

2015-10-27 Thread Thomas Schwinge
Hi!

On Thu, 8 Oct 2015 18:39:25 +0200, I wrote:
> Some bits extracted out of gomp-4_0-branch, and some other bits
> rewritten; here is a patch to support OpenACC Combined Directives in C,
> C++.  (The Fortran front end already does support these.)
> 
> As far as I know, Jakub is not available at this time, so maybe the C
> (Joseph) and C++ (Jason, Nathan) front end maintainers could please
> review this, instead of him?  (The front end changes as well as the few
> other cleanup changes should all be straight forward.)  OK for trunk once
> bootstrap tested?

Given the approval by Joseph and Nathan (thanks!), I have now committed
this, in r229404:

commit 2c4c872529d6e1051fb9cdd641ad5cc9fbc0e2cb
Author: tschwinge 
Date:   Tue Oct 27 08:39:15 2015 +

[PR c/64765, c/64880] Support OpenACC Combined Directives in C, C++

gcc/c-family/
PR c/64765
PR c/64880
* c-common.h (c_oacc_split_loop_clauses): Declare function.
* c-omp.c (c_oacc_split_loop_clauses): New function.
gcc/c/
PR c/64765
PR c/64880
* c-parser.c (c_parser_oacc_loop): Add mask, cclauses formal
parameters, and handle these.  Adjust all users.
(c_parser_oacc_kernels, c_parser_oacc_parallel): Merge functions
into...
(c_parser_oacc_kernels_parallel): ... this new function.  Adjust
all users.
* c-tree.h (c_finish_oacc_parallel, c_finish_oacc_kernels): Don't
declare functions.
(c_finish_omp_construct): Declare function.
* c-typeck.c (c_finish_oacc_parallel, c_finish_oacc_kernels):
Merge functions into...
(c_finish_omp_construct): ... this new function.
gcc/cp/
PR c/64765
PR c/64880
* cp-tree.h (finish_oacc_kernels, finish_oacc_parallel): Don't
declare functions.
(finish_omp_construct): Declare function.
* parser.c (cp_parser_oacc_loop): Add p_name, mask, cclauses
formal parameters, and handle these.  Adjust all users.
(cp_parser_oacc_kernels, cp_parser_oacc_parallel): Merge functions
into...
(cp_parser_oacc_kernels_parallel): ... this new function.  Adjust
all users.
* semantics.c (finish_oacc_kernels, finish_oacc_parallel): Merge 
functions into...
(finish_omp_construct): ... this new function.
gcc/
* tree.h (OACC_PARALLEL_BODY, OACC_PARALLEL_CLAUSES)
(OACC_KERNELS_BODY, OACC_KERNELS_CLAUSES, OACC_KERNELS_COMBINED)
(OACC_PARALLEL_COMBINED): Don't define macros.  Adjust all users.
gcc/testsuite/
PR c/64765
PR c/64880
* c-c++-common/goacc/loop-1.c: Don't skip for C++.  Don't prune
sorry message.
(PR64765): New function.
* gfortran.dg/goacc/coarray_2.f90: XFAIL.
* gfortran.dg/goacc/combined_loop.f90: Extend.  Don't prune
sorry message.
* gfortran.dg/goacc/cray.f95: Refine prune directive.
* gfortran.dg/goacc/parameter.f95: Likewise.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/combdir-1.c: New file.
* testsuite/libgomp.oacc-fortran/combdir-1.f90: Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@229404 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog  |   6 +
 gcc/c-family/ChangeLog |   9 ++
 gcc/c-family/c-common.h|   1 +
 gcc/c-family/c-omp.c   |  40 +-
 gcc/c/ChangeLog|  19 +++
 gcc/c/c-parser.c   | 148 +---
 gcc/c/c-tree.h |   3 +-
 gcc/c/c-typeck.c   |  36 ++---
 gcc/cp/ChangeLog   |  18 +++
 gcc/cp/cp-tree.h   |   3 +-
 gcc/cp/parser.c| 149 -
 gcc/cp/semantics.c |  54 +++-
 gcc/fortran/trans-openmp.c |   4 -
 gcc/gimplify.c |  16 +--
 gcc/testsuite/ChangeLog|  13 ++
 gcc/testsuite/c-c++-common/goacc/loop-1.c  |  10 +-
 gcc/testsuite/gfortran.dg/goacc/coarray_2.f90  |   1 +
 gcc/testsuite/gfortran.dg/goacc/combined_loop.f90  |   9 +-
 gcc/testsuite/gfortran.dg/goacc/cray.f95   |   2 +-
 gcc/testsuite/gfortran.dg/goacc/parameter.f95  |   2 +-
 gcc/tree-pretty-print.c|  11 +-
 gcc/tree.def   |   8 +-
 gcc/tree.h |  20 ---
 libgomp/ChangeLog  |   5 +
 .../libgomp.oacc-c-c++-common/combdir-1.c  |  52 +++
 .../testsuite/libgomp.oacc-fortran/combdir-1.f90   |  37 +
 26 files changed, 407 insertions(+), 269 deletions(-)

diff --gi

[PATCH] Remove push/pop_cfun around cgraph::release_function_body

2015-10-27 Thread Richard Biener

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-10-27  Richard Biener  

* cfg.c (free_edge): Add function argument and use it instead of cfun.
(clear_edges): Likewise.
* cfg.h (clear_edges): Adjust prototype.
* cfgexpand.c (pass_expand::execute): Adjust.
* cfgloop.c (release_recorded_exits): Add function argument and use
it instead of cfun.
* cfgloop.h (release_recorded_exits): Adjust prototype.
(loops_state_satisfies_p): Add overload with function argument.
(loops_state_set): Likewise.
(loops_state_clear): Likewise.
(struct loop_iterator): Add function argument to constructor
and iterator and use it instead of cfun.
(FOR_EACH_LOOP_FN): New macro.
(loop_optimizer_finalize): Add overload with function argument.
* loop-init.c (loop_optimizer_init): Adjust.
(fix_loop_structure): Likewise.
(loop_optimizer_finaliz): Add function argument and use it
instead of cfun.
* tree-cfg.c (delete_tree_cfg_annotations): Likewise.
* tree-cfg.h (delete_tree_cfg_annotations): Adjust prototype.
* cgraph.c (release_function_body): Do not push/pop cfun.
* final.c (rest_of_clean_state): Adjust.
* graphite.c (graphite_finalize): Likewise.
* tree-ssa-copy.c (fini_copy_prop): Likewise.
* tree-ssa-dce.c (perform_tree_ssa_dce): Likewise.
* tree-ssa-loop-ivcanon.c (canonicalize_induction_variables): Likewise.
(tree_unroll_loops_completely): Likewise.
(pass_complete_unrolli::execute): Likewise.
* tree-ssa-loop-niter.c (free_numbers_of_iterations_estimates):
Add function argument and use it instead of cfun.
* tree-ssa-loop-niter.h (free_numbers_of_iterations_estimates):
Adjust prototype.
* tree-ssa-loop.c (tree_ssa_loop_done): Adjust.
* tree-ssa.c (delete_tree_ssa): Add function argument and use it
instead of cfun.
* tree-ssa.h (delete_tree_ssa): Adjust prototype.
* tree-ssanames.c (fini_ssanames): Add function argument and use it
instead of cfun.
* tree-ssanames.c (fini_ssanames): Adjust prototype.
* tree-vrp.c (execute_vrp): Adjust.
* value-prof.c (free_histograms): Add function argument and use it
instead of cfun.
* value-prof.h (free_histograms): Adjust prototype.

Index: trunk/gcc/cfg.c
===
--- trunk.orig/gcc/cfg.c2015-10-26 15:27:42.150921714 +0100
+++ trunk/gcc/cfg.c 2015-10-26 15:32:23.679086458 +0100
@@ -88,35 +88,35 @@ init_flow (struct function *the_fun)
without actually removing it from the pred/succ arrays.  */
 
 static void
-free_edge (edge e)
+free_edge (function *fn, edge e)
 {
-  n_edges_for_fn (cfun)--;
+  n_edges_for_fn (fn)--;
   ggc_free (e);
 }
 
 /* Free the memory associated with the edge structures.  */
 
 void
-clear_edges (void)
+clear_edges (struct function *fn)
 {
   basic_block bb;
   edge e;
   edge_iterator ei;
 
-  FOR_EACH_BB_FN (bb, cfun)
+  FOR_EACH_BB_FN (bb, fn)
 {
   FOR_EACH_EDGE (e, ei, bb->succs)
-   free_edge (e);
+   free_edge (fn, e);
   vec_safe_truncate (bb->succs, 0);
   vec_safe_truncate (bb->preds, 0);
 }
 
-  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
-free_edge (e);
-  vec_safe_truncate (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, 0);
-  vec_safe_truncate (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs, 0);
+  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (fn)->succs)
+free_edge (fn, e);
+  vec_safe_truncate (EXIT_BLOCK_PTR_FOR_FN (fn)->preds, 0);
+  vec_safe_truncate (ENTRY_BLOCK_PTR_FOR_FN (fn)->succs, 0);
 
-  gcc_assert (!n_edges_for_fn (cfun));
+  gcc_assert (!n_edges_for_fn (fn));
 }
 
 /* Allocate memory for basic_block.  */
@@ -350,7 +350,7 @@ remove_edge_raw (edge e)
   disconnect_src (e);
   disconnect_dest (e);
 
-  free_edge (e);
+  free_edge (cfun, e);
 }
 
 /* Redirect an edge's successor from one block to another.  */
Index: trunk/gcc/cfg.h
===
--- trunk.orig/gcc/cfg.h2015-10-26 15:27:42.150921714 +0100
+++ trunk/gcc/cfg.h 2015-10-26 15:30:14.536634739 +0100
@@ -74,8 +74,8 @@ struct GTY(()) control_flow_graph {
 };
 
 
-extern void init_flow (struct function *);
-extern void clear_edges (void);
+extern void init_flow (function *);
+extern void clear_edges (function *);
 extern basic_block alloc_block (void);
 extern void link_block (basic_block, basic_block);
 extern void unlink_block (basic_block);
Index: trunk/gcc/cfgexpand.c
===
--- trunk.orig/gcc/cfgexpand.c  2015-10-26 15:27:42.150921714 +0100
+++ trunk/gcc/cfgexpand.c   2015-10-26 15:30:14.538634762 +0100
@@ -6307,7 +6307,7 @@ pass_expand::execute (function *fun)
   /* Free stuff we

Re: [PR 67794] Also remap SSA_NAMEs defined in ASMs in IPA-SRA

2015-10-27 Thread Christophe Lyon
On 9 October 2015 at 09:46, Richard Biener  wrote:
> On Thu, 8 Oct 2015, Martin Jambor wrote:
>
>> Hi,
>>
>> the following fixes PR 67794 by properly remapping SSA_NAMEs which are
>> based on PARM_DECLs which are about to be removed as unnecessary.  And
>> by "properly" I mean also when they are defined by a GIMPL_ASM
>> statement.  In fact, it switches to using an iterator over definitions
>> to make sure it always handles everything...  well,except for PHIs
>> which are still handled specially because, from a quick glance over
>> their source, it seemed to me that the iterator does not support them.
>>
>> Bootstrapped and tested on x86_64-linux.  OK for trunk?
>> The issue is most probably latent on a number of old branches, do we
>> want to backport the patch to any of them?
>>
>> Thanks,
>>
>> Martin
>>
>>
>> 2015-10-08  Martin Jambor  
>>
>>   tree-optimization/67794
>>   * tree-sra.c (replace_removed_params_ssa_names): Do not distinguish
>>   between types of state,ents but accept original definitions as a
>>   parameter.
>>   (ipa_sra_modify_function_body): Use FOR_EACH_SSA_DEF_OPERAND to
>>   iterate over definitions.
>>
>> testsuite/
>> * gcc.dg/ipa/ipa-sra-10.c: Nw test.
>> * gcc.dg/torture/pr67794.c: Likewise.
>>
>> diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c 
>> b/gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c
>> new file mode 100644
>> index 000..24b64d1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/ipa/ipa-sra-10.c
>> @@ -0,0 +1,34 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fipa-sra -fdump-tree-eipa_sra-details"  } */
>> +
>> +extern void consume (int);
>> +extern int glob, glob1, glob2;
>> +extern int get (void);
>> +
>> +
>> +static void __attribute__ ((noinline))
>> +foo (int a)
>> +{
>> +  a = glob;
>> +  consume (a);
>> +  a = get ();
>> +  consume (a);
>> +  __asm__ volatile("" : : ""(a));
>> +  consume (a);
>> +
>> +  if (glob1)
>> +a = glob1;
>> +  else
>> +a = glob2;
>> +  consume (a);
>> +}
>> +
>> +int
>> +bar (int a)
>> +{
>> +  foo (a);
>> +  glob = a;
>> +  return 0;
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "replacing an SSA name of a removed 
>> param" 4 "eipa_sra" } } */
>> diff --git a/gcc/testsuite/gcc.dg/torture/pr67794.c 
>> b/gcc/testsuite/gcc.dg/torture/pr67794.c
>> new file mode 100644
>> index 000..5489e56
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/torture/pr67794.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-do compile } */
>> +
>> +int *b;
>> +static void fn1(int *best, int *dmin) {
>> +  int a[64];
>> +  dmin = a;
>> +  __asm__ volatile("" : "+&r"(dmin) : ""(best));
>> +}
>> +
>> +__attribute__((always_inline)) static inline void fn2(int *best) { 
>> fn1(best, b); }
>> +
>> +void fn3(void) {
>> +  int c[1];
>> +  fn2(c);
>> +}
>> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
>> index 4327990..f2a4e72 100644
>> --- a/gcc/tree-sra.c
>> +++ b/gcc/tree-sra.c
>> @@ -4612,61 +4612,45 @@ get_adjustment_for_base (ipa_parm_adjustment_vec 
>> adjustments, tree base)
>>return NULL;
>>  }
>>
>> -/* If the statement STMT defines an SSA_NAME of a parameter which is to be
>> -   removed because its value is not used, replace the SSA_NAME with a one
>> -   relating to a created VAR_DECL together all of its uses and return true.
>> -   ADJUSTMENTS is a pointer to an adjustments vector.  */
>> +/* If OLD_NAME, which is being defined by statement STMT, is an SSA_NAME of 
>> a
>> +   parameter which is to be removed because its value is not used, create a 
>> new
>> +   SSA_NAME relating to a replacement VAR_DECL, replace all uses of the
>> +   original with it and return it.  If there is no need to re-map, return 
>> true.
>> +   ADJUSTMENTS is a pointer to a vector of IPA-SRA adjustments.  */
>
> The docs still say we return a bool
>
> Otherwise the patch looks ok to me.  I think we want to backport it
> to branche(s) after a while.
>
Hi Martin,

After your backport in the gcc-5 branch, I see build failures:
/tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:
In function ‘tree_node* replace_removed_params_ssa_names(tree_node*,
gimple_statement_base**, ipa_parm_adjustment_vec)’:
/tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4609:
error: cannot convert ‘gimple_statement_base**’ to
‘gimple_statement_base*’ for argument ‘2’ to ‘tree_node*
make_ssa_name(tree_node*, gimple_statement_base*)’
/tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:
In function ‘bool
ipa_sra_modify_function_body(ipa_parm_adjustment_vec)’:
/tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4703:
error: cannot convert ‘gphi*’ to ‘gimple_statement_base**’ for
argument ‘2’ to ‘tree_node*
replace_removed_params_ssa_names(tree_node*, gimple_statement_base**,
ipa_parm_adjustment_vec)’
/tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4772:
error: cannot convert ‘gimple_statement_base*’ to
‘gimple_statement_base**’ for argument ‘2’ to

[RFC, PATCH v2] Disable -fprofile-use related optimizations if corresponding .gcda file not found.

2015-10-27 Thread Maxim Ostapenko

Hi!

As was pointed out in previous thread 
(https://gcc.gnu.org/ml/gcc-patches/2015-10/msg00723.html), sometimes 
PGO-built binaries can actually introduce performance regressions. We 
could identify affected object files and disable PGO for them by simply 
removing corresponding .gcda file.

My previous patch was incomplete and had two major drawbacks:

* It disabled unrelated options (e.g. -frename-registers) and 
PGO-related options, set up by user explicitly, if corresponding .gcda 
file is not found.
* As Markus pointed out, in many cases we actually don't want to disable 
PGO-related options even if .gcda file is missing.


This patch tries to solve these issues in the following way:

* Introduce new -fprofile-use-partial switch.
* If -fprofile-use-partial is ON, try to find corresponding .gcda file 
in the very early stage during compiler invoking (actually, when common 
command line options are parsed). If .gcda file exists, enable 
PGO-related optimizations and emit warning otherwise. I believe this 
should not break existing code.


Regtested and bootstrapped on x86_64-unknown-linux-gnu.

Does the patch look sensible?

Thanks,
-Maxim
gcc/ChangeLog:

2015-10-27  Maxim Ostapenko  

	* common.opt (profile_file_name): New variable.
	(fprofile-use-partial): Likewise.
	(fprofile-use-partial=): Likewise.
	* opts.c: Include gcov-io.h.
	(common_handle_option): Defer enabling PGO-related optimizations until
	we know if corresponding .gcda file exists.
	(maybe_setup_aux_base_name): New function.
	(setup_coverage_filename): Likewise.
	(enable_fdo_optimizations): Move up in source file.
	(finish_options): Call maybe_setup_aux_base_name and setup coverage
	filename. Enable PGO-related optimizations if corresponding .gcda file
	exists if -fprofile-use-partial is used.  If -fprofile-use is used,
	enable PGO-related optimizations without any other conditions.
	* coverage.c (coverage_init): Adjust to use profile_file_name.

diff --git a/gcc/common.opt b/gcc/common.opt
index 12ca0d6..fb04201 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -198,6 +198,9 @@ const char *main_input_basename
 Variable
 int main_input_baselength
 
+Variable
+char *profile_file_name
+
 ; Which options have been printed by --help.
 Variable
 char *help_printed
@@ -1861,6 +1864,14 @@ fprofile-use=
 Common Joined RejectNegative
 Enable common options for performing profile feedback directed optimizations, and set -fprofile-dir=.
 
+fprofile-use-partial
+Common Var(flag_profile_use_partial)
+Enable common options for performing profile feedback directed optimizations.
+
+fprofile-use-partial=
+Common Joined RejectNegative
+Enable common options for performing profile feedback directed optimizations, and set -fprofile-dir=.
+
 fprofile-values
 Common Report Var(flag_profile_values)
 Insert code to profile values of expressions.
diff --git a/gcc/coverage.c b/gcc/coverage.c
index 4e08e5f..461dd48 100644
--- a/gcc/coverage.c
+++ b/gcc/coverage.c
@@ -1184,7 +1184,7 @@ void
 coverage_init (const char *filename)
 {
   int len = strlen (filename);
-  int prefix_len = 0;
+  da_file_name = profile_file_name;
 
   /* Since coverage_init is invoked very early, before the pass
  manager, we need to set up the dumping explicitly. This is
@@ -1193,24 +1193,6 @@ coverage_init (const char *filename)
 g->get_passes ()->get_pass_profile ()->static_pass_number;
   g->get_dumps ()->dump_start (profile_pass_num, NULL);
 
-  if (!profile_data_prefix && !IS_ABSOLUTE_PATH (filename))
-profile_data_prefix = getpwd ();
-
-  if (profile_data_prefix)
-prefix_len = strlen (profile_data_prefix);
-
-  /* Name of da file.  */
-  da_file_name = XNEWVEC (char, len + strlen (GCOV_DATA_SUFFIX)
-			  + prefix_len + 2);
-
-  if (profile_data_prefix)
-{
-  memcpy (da_file_name, profile_data_prefix, prefix_len);
-  da_file_name[prefix_len++] = '/';
-}
-  memcpy (da_file_name + prefix_len, filename, len);
-  strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX);
-
   bbg_file_stamp = local_tick;
   
   if (flag_auto_profile)
diff --git a/gcc/opts.c b/gcc/opts.c
index 9a3fbb3..2e0cfa4 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "opts-diagnostic.h"
 #include "insn-attr-common.h"
 #include "common/common-target.h"
+#include "gcov-io.h"
 
 static void set_Wstrict_aliasing (struct gcc_options *opts, int onoff);
 
@@ -660,6 +661,97 @@ default_options_optimization (struct gcc_options *opts,
 			 lang_mask, handlers, loc, dc);
 }
 
+/* Enable FDO-related flags.  */
+
+static void
+enable_fdo_optimizations (struct gcc_options *opts,
+			  struct gcc_options *opts_set,
+			  int value)
+{
+  if (!opts_set->x_flag_branch_probabilities)
+opts->x_flag_branch_probabilities = value;
+  if (!opts_set->x_flag_profile_values)
+opts->x_flag_profile_values = value;
+  if (!opts_set->x_flag_unroll_loops)
+opts->x_flag_unroll_loops = value;
+  if (!opts_set->x_flag_peel_loops)

Re: [testsuite, committed, PR68063] Add missing private clause in libgomp.c++/member-2.C

2015-10-27 Thread Thomas Schwinge
Hi Jakub!

On Fri, 23 Oct 2015 12:48:57 +0200, Tom de Vries  wrote:
> this patch adds a missing private clause in libgomp.c++/member-2.C (as 
> you suggested in the PR).
> 
> This allows the test to succeed consistently.

I'm seeing occasional (very rarely) failure of libgomp.c++/member-1.C
(without anything in libgomp.log, unfortunately).  I have not made an
attempt to properly understand what it's doing, but this:

[...]
   150#pragma omp parallel private (f)
   151  {
   152f = false;
   153  #pragma omp single
   154  #pragma omp taskloop lastprivate (a, t, b, n)
   155for (int i = 0; i < 30; i++)
   156  {
   157int q = omp_get_thread_num ();
   158if (f && (a != 7 * q || b != 8 * q || r != 9 * q || t != 
10 * q))
   159  __builtin_abort ();
   160take (a, r, t, b);
   161A::a = 7 * q;
   162A::b = 8 * q;
   163R::r = 9 * q;
[...]

... looks a bit as if it might need to get the same patch applied that
Tom has applied to libgomp.c++/member-2.C:

> --- a/libgomp/testsuite/libgomp.c++/member-2.C
> +++ b/libgomp/testsuite/libgomp.c++/member-2.C
> @@ -154,7 +154,7 @@ A::m1 ()
>  {
>f = false;
>  #pragma omp single
> -#pragma omp taskloop lastprivate (a, T::t, b, n)
> +#pragma omp taskloop lastprivate (a, T::t, b, n) private (R::r)
>for (int i = 0; i < 30; i++)
>   {
> int q = omp_get_thread_num ();

Could you please review that, and while at it, could there possibly be
any other races of r or R::r, or other objects?


Grüße
 Thomas


signature.asc
Description: PGP signature


Fold comparisons between sqrt and zero

2015-10-27 Thread Richard Sandiford
Richard Biener  writes:
> On Mon, Oct 26, 2015 at 10:36 AM, Richard Sandiford
>  wrote:
>> An upcoming patch adds a fold from hypot(x,x) to fabs(x)*sqrt(2).
>> This is unusual in that it could trigger in the gimplifier but would
>> require new SSA names to be created.  This patch makes sure that we
>> don't try to create new SSA names when we're not yet in SSA form.
>>
>> Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi.
>> OK to install?
>
> Please use
>
>  if (gimple_in_ssa_p (cfun))
>   res = make_ssa_name (type);
>  else
>   res = create_tmp_reg (type);
>
> Ok with that change.

Thanks, committed in that form.  I retested the series on top of it and
the create_tmp_reg causes the later signbit patch (not yet committed)
to regress on:

  signbit(sqrt(x))

which is always 0 for -ffast-math.  The signbit fold first converts it to:

  sqrt(x) < 0

and whether we realise that this is false depends on a race between two
folders: the sqrt comparison folder, which wants to convert it to:

  x < 0*0

and the generic tree_expr_nonnegative_p rule for ... < 0, which would
give the hoped-for 0.

The sqrt code already handles comparisons with negative values specially,
so this patch simply extends that idea to comparisons with zero.

I don't really like patches like this since they'll probably help no
real code, but still...

Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi.
OK to install?

Thanks,
Richard


gcc/
* match.pd: Handle sqrt(x) cmp 0 specially.

gcc/testsuite/
* gcc.dg/torture/builtin-sqrt-cmp-1.c: New test.

diff --git a/gcc/match.pd b/gcc/match.pd
index 26491d2..b8e6b46 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1973,6 +1973,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
{ constant_boolean_node (true, type); })
/* sqrt(x) > y is the same as x >= 0, if y is negative.  */
(ge @0 { build_real (TREE_TYPE (@0), dconst0); })))
+ (if (real_equal (TREE_REAL_CST_PTR (@1), &dconst0))
+  (switch
+   /* sqrt(x) < 0 is always false.  */
+   (if (cmp == LT_EXPR)
+   { constant_boolean_node (false, type); })
+   /* sqrt(x) >= 0 is always true if we don't care about NaNs.  */
+   (if (cmp == GE_EXPR && !HONOR_NANS (@0))
+   { constant_boolean_node (true, type); })
+   /* sqrt(x) <= 0 -> x == 0.  */
+   (if (cmp == LE_EXPR)
+   (eq @0 @1))
+   /* Otherwise sqrt(x) cmp 0 -> x cmp 0.  Here cmp can be >=, >,
+  == or !=.  In the last case:
+
+   (sqrt(x) != 0) == (NaN != 0) == true == (x != 0)
+
+ if x is negative or NaN.  Due to -funsafe-math-optimizations,
+ the results for other x follow from natural arithmetic.  */
+   (cmp @0 @1)))
  (if (cmp == GT_EXPR || cmp == GE_EXPR)
   (with
{
diff --git a/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c 
b/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c
new file mode 100644
index 000..3f4a708
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c
@@ -0,0 +1,53 @@
+/* { dg-do link } */
+/* { dg-options "-ffast-math" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+
+extern double sqrt (double);
+extern float sqrtf (float);
+extern long double sqrtl (long double);
+
+/* All references to link_error should go away at compile-time.  */
+extern void link_error (void);
+
+#define TEST_ONE(SUFFIX, TYPE) \
+  void __attribute__ ((noinline, noclone)) \
+  test##SUFFIX (TYPE f, int *res)  \
+  {\
+TYPE sqrt_res = sqrt##SUFFIX (f);  \
+res[0] = sqrt_res < 0; \
+if (res[0])\
+  link_error ();   \
+res[1] = sqrt_res <= 0;\
+if (res[1] != (f == 0))\
+  link_error ();   \
+res[2] = (sqrt_res == 0);  \
+if (res[2] != (f == 0))\
+  link_error ();   \
+res[3] = (sqrt_res != 0);  \
+if (res[3] != (f != 0))\
+  link_error ();   \
+res[4] = (sqrt_res > 0);   \
+if (res[4] != (f > 0)) \
+  link_error ();   \
+res[5] = (sqrt_res >= 0);  \
+if (!res[5])   \
+  link_error ();   \
+  }
+
+volatile float f;
+volatile double d;
+volatile long double ld;
+
+TEST_ONE (f, float)
+TEST_ONE (, double)
+TEST_ONE (l, long double)
+
+int
+main ()
+{
+  int res[6];
+  testf (f, res);
+  test (d, res);
+  testl (ld, res);
+  return 0;
+}



Re: [testsuite, committed, PR68063] Add missing private clause in libgomp.c++/member-2.C

2015-10-27 Thread Jakub Jelinek
On Tue, Oct 27, 2015 at 10:15:11AM +0100, Thomas Schwinge wrote:
> ... looks a bit as if it might need to get the same patch applied that
> Tom has applied to libgomp.c++/member-2.C:

You're right, member-2.C is a templatized version of member-1.C,
a change to add private (R::r) to that corresponding member-1.C taskloop
directive is preapproved.
The other member-*.C testcases don't have this issue.

Jakub


[Patch, Contrib] Download ISL 0.15 by download_prerequisites

2015-10-27 Thread Tobias Burnus
Hi all,

recently, support for ISL 0.15 was added to GCC and also
ftp://gcc.gnu.org/pub/gcc/infrastructure/ now contains ISL 0.15.

Hence, there is no reason not to download the newest version by
download_prerequisites.

OK for the trunk? (One could also add it to GCC 5 as the ISL 0.15
patches landed there as well on 2015-10-12.)


Side remark: I think one could could consider to also put newer versions
of the other prerequisites on the FTP server, which currently has quite
old versions:
- GMP:  4.3.2 of January 2010 - current is 6.0.0a of March 2014 (6.1RC is Oct 
2015)
- MPFR: 2.4.2 of mid 2009 - current is 3.1.3  of June 2015 (the web page 
has additionally 3 post-relase bug-fix patches)
- MPC:  0.8.1 of end of 2009  - current is 1.0.2  of February 2015

Cheers,

Tobias


contrib/
* download_prerequisites: Download ISL 0.15.

diff --git a/contrib/download_prerequisites b/contrib/download_prerequisites
index 6940330..a685a1d 100755
--- a/contrib/download_prerequisites
+++ b/contrib/download_prerequisites
@@ -48,7 +48,7 @@ ln -sf $MPC mpc || exit 1
 
 # Necessary to build GCC with the Graphite loop optimizations.
 if [ "$GRAPHITE_LOOP_OPT" = "yes" ] ; then
-  ISL=isl-0.14
+  ISL=isl-0.15
 
   wget ftp://gcc.gnu.org/pub/gcc/infrastructure/$ISL.tar.bz2 || exit 1
   tar xjf $ISL.tar.bz2  || exit 1



Re: [testsuite, committed, PR68063] Add missing private clause in libgomp.c++/member-2.C

2015-10-27 Thread Thomas Schwinge
Hi!

On Tue, 27 Oct 2015 11:08:15 +0100, Jakub Jelinek  wrote:
> On Tue, Oct 27, 2015 at 10:15:11AM +0100, Thomas Schwinge wrote:
> > ... looks a bit as if it might need to get the same patch applied that
> > Tom has applied to libgomp.c++/member-2.C:
> 
> You're right, member-2.C is a templatized version of member-1.C,
> a change to add private (R::r) to that corresponding member-1.C taskloop
> directive is preapproved.

Tested, and committed in r229411:

commit 540c8b2a12162472a4f644b7b533041a813a0332
Author: tschwinge 
Date:   Tue Oct 27 10:32:32 2015 +

[PR testsuite/68063] Add missing private clause in libgomp.c++/member-1.C

PR testsuite/68063
* testsuite/libgomp.c++/member-1.C (A::m1): Add missing private clause.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@229411 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgomp/ChangeLog| 5 +
 libgomp/testsuite/libgomp.c++/member-1.C | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git libgomp/ChangeLog libgomp/ChangeLog
index ca34af8..0194503 100644
--- libgomp/ChangeLog
+++ libgomp/ChangeLog
@@ -1,3 +1,8 @@
+2015-10-27  Thomas Schwinge  
+
+   PR testsuite/68063
+   * testsuite/libgomp.c++/member-1.C (A::m1): Add missing private clause.
+
 2015-10-27  James Norris  
 
* testsuite/libgomp.oacc-c-c++-common/combdir-1.c: New file.
diff --git libgomp/testsuite/libgomp.c++/member-1.C 
libgomp/testsuite/libgomp.c++/member-1.C
index d2d0c5b..c7c1ba4 100644
--- libgomp/testsuite/libgomp.c++/member-1.C
+++ libgomp/testsuite/libgomp.c++/member-1.C
@@ -151,7 +151,7 @@ A::m1 ()
 {
   f = false;
 #pragma omp single
-#pragma omp taskloop lastprivate (a, t, b, n)
+#pragma omp taskloop lastprivate (a, t, b, n) private (R::r)
   for (int i = 0; i < 30; i++)
{
  int q = omp_get_thread_num ();


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [PATCH][AArch64] Enable autoprefetcher modelling in the scheduler

2015-10-27 Thread James Greenhalgh
On Thu, Oct 22, 2015 at 05:05:26PM +0100, Kyrill Tkachov wrote:
> Hi all,
> 
> This patch enables the autoprefetcher heuristic for scheduling in AArch64.
> It is enabled for the Cortex-A53, Cortex-A57 cores and is off for the other 
> cores,
> leaving their behaviour unchanged.
> 
> When enabled, the scheduler will try to sort groups of loads or stores in
> order of the offset from a common base register.
> 
> From what I understand of the relevant scheduling hooks, there are
> essentially three levels of this:
> 1) Don't use the autoprefetcher heuristic
> 2) Use it to order loads/stores but allow other scheduling heuristics to
> reorder them again to maximise multi-issue opportunities
> 3) Use it to order loads/stores and keep that order, even if it can harm
> multi-issue opportunities.
> 
> With this patch I get a 0.4% improvement in SPECINT 2006 and 1.7% improvement
> in SPECFP 2006 on a Cortex-A57 as well as improvements in various streaming
> workloads.
> 
> On Cortex-A53 I see improvements to various streaming workloads and there's
> no regressions or improvements on SPEC2000.
> 
> Bootstrapped and tested on aarch64-none-linux-gnu.
> 
> Ok for trunk?

OK.

Thanks,
James

> 2015-10-22  Kyrylo Tkachov  
> 
> * config/aarch64/aarch64-protos.h
> (struct tune_params): Add autoprefetcher_model field.
> * config/aarch64/aarch64.c: Include params.h
> (generic_tunings): Specify autoprefetcher_model value.
> (cortexa53_tunings): Likewise.
> (cortexa57_tunings): Likewise.
> (cortexa72_tunings): Likewise.
> (thunderx_tunings): Likewise.
> (xgene1_tunings): Likewise.
> (aarch64_first_cycle_multipass_dfa_lookahead_guard): New function.
> (TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD): Define.
> (aarch64_override_options_internal): Set
> PARAM_SCHED_AUTOPREF_QUEUE_DEPTH param.



Re: [PATCH] Use subreg_regno instead of subreg_regno_offset

2015-10-27 Thread Bernd Schmidt

On 10/26/2015 11:46 PM, Anatoliy Sokolov wrote:

   This patch change code 'REGNO (subreg) + subreg_regno_offset (...)'
with subreg_regno (subreg).


The patch has whitespace damage that makes it difficult to apply. Please 
use text/plain attachments.



Index: gcc/reg-stack.c
===
--- gcc/reg-stack.c(revision 229083)
+++ gcc/reg-stack.c(working copy)
@@ -416,11 +416,7 @@
rtx subreg;
if (STACK_REG_P (subreg = SUBREG_REG (*pat)))
  {
-  int regno_off = subreg_regno_offset (REGNO (subreg),
-   GET_MODE (subreg),
-   SUBREG_BYTE (*pat),
-   GET_MODE (*pat));
-  *pat = FP_MODE_REG (REGNO (subreg) + regno_off,
+  *pat = FP_MODE_REG (subreg_regno (subreg),
GET_MODE (subreg));
return pat;


Isn't this wrong? subreg_regno wants to be called with a SUBREG, but 
here we already had subreg = SUBREG_REG (*pat).



@@ -5522,12 +5516,7 @@
  op0 = SUBREG_REG (op0);
  code0 = GET_CODE (op0);
  if (code0 == REG && REGNO (op0) < FIRST_PSEUDO_REGISTER)
-  op0 = gen_rtx_REG (word_mode,
- (REGNO (op0) +
-  subreg_regno_offset (REGNO (SUBREG_REG (orig_op0)),
-   GET_MODE (SUBREG_REG (orig_op0)),
-   SUBREG_BYTE (orig_op0),
-   GET_MODE (orig_op0;
+  op0 = gen_rtx_REG (word_mode, subreg_regno (op0));
}


Same problem as in the reg-stack code? The existing code was using 
orig_op0 to get the subreg, you've changed it to use op0 which is 
already the SUBREG_REG.


With an x86 test you're not exercising reload, and even on other targets 
this is not a frequently used path. I've stopped reviewing here, I think 
this is a good example of the kind of cleanup patch we _shouldn't_ be 
doing. We've proved it's risky, and unless these cleanup patches were a 
preparation for functional changes, we should just leave the code alone IMO.



Bernd


Re: [AArch64][PATCH 1/7] Add support for ARMv8.1 Adv.SIMD,instructions.

2015-10-27 Thread James Greenhalgh
On Fri, Oct 23, 2015 at 01:16:25PM +0100, Matthew Wahab wrote:
> The ARMv8.1 architecture extension adds two Adv.SIMD instructions,
> sqrdmlah and sqrdmlsh. This patch series adds the instructions to the
> AArch64 backend together with the ACLE feature macro and NEON intrinsics
> to make use of them. The instructions are enabled when -march=armv8.1-a
> is selected.
> 
> To support execution tests for the instructions, code is also added to
> the testsuite to check the target capabilities and to specify required
> compiler options.
> 
> This patch adds target feature macros for the instructions. Subsequent
> patches:
> - add the instructions to the aarch64-simd patterns,
> - add GCC builtins to generate the instructions,
> - add the ACLE feature macro __ARM_FEATURE_QRDMX,
> - add support for ARMv8.1-A Adv.SIMD tests to the dejagnu support code,
> - add NEON intrinsics for the basic form of the instructions.
> - add NEON intrinsics for the *_lane forms of the instructions.
> 
> Tested the series for aarch64-none-linux-gnu with native bootstrap and
> make check on an ARMv8 architecture. Also tested aarch64-none-elf with
> cross-compiled check-gcc on an ARMv8.1 emulator.
> 
> Ok for trunk?
> Matthew

OK.

Thanks,
James

> 
> gcc/
> 2015-10-23  Matthew Wahab  
> 
>   * config/aarch64/aarch64.h (AARCH64_ISA_RDMA): New.
>   (TARGET_SIMD_RDMA): New.
> 



Re: [PATCH]Add -fprofile-use option for check_effective_target_freorder.

2015-10-27 Thread Renlin Li



On 26/10/15 13:24, Bernd Schmidt wrote:

On 10/26/2015 02:17 PM, Teresa Johnson wrote:

On Mon, Oct 26, 2015 at 2:00 AM, Renlin Li  wrote:

 * lib/target-supports.exp (check_effective_target_freorder): Add
 -fprofile-use flag.


Hmmm, the testcases themselves which use this predicate only use 
-freorder-and-partition, so maybe this requires more thought.


Yes. In all of the related testcases, only -freorder-and-partition flag 
is provided explicitly.

How about creating a new dg-add-options for freorder?

proc add_options_for_freorder { flags } {
return "$flags -freorder-blocks-and-partition -fprofile-use"
}


proc check_effective_target_freorder {} {
return [check_no_compiler_messages freorder object {
void foo (void) { }
} [add_options_for_freorder ""]]
}

And in all test case which requires freorder support, "{ dg-add-options 
freorder }" is used to add necessary compiler flags?



Regards,
Renlin



Bernd





Re: [PATCH]Add -fprofile-use option for check_effective_target_freorder.

2015-10-27 Thread Bernd Schmidt

On 10/27/2015 11:54 AM, Renlin Li wrote:

Yes. In all of the related testcases, only -freorder-and-partition flag
is provided explicitly.
How about creating a new dg-add-options for freorder?

proc add_options_for_freorder { flags } {
 return "$flags -freorder-blocks-and-partition -fprofile-use"
}


proc check_effective_target_freorder {} {
 return [check_no_compiler_messages freorder object {
 void foo (void) { }
 } [add_options_for_freorder ""]]
}


I don't know that tcl syntax but apparently this is done already for a 
number of other cases. So, probably the right thing to do.


You might want to coordinate with Maxim ostapenko who's currently 
working on another -fprofile-use patch. Hopefully not one that disables 
-freorder-blocks-and-partition if it can't find .gcda files.



Bernd


Re: scheduling conditional branches after stores

2015-10-27 Thread Bernd Schmidt

On 10/09/2015 11:36 PM, Mike Stump wrote:

So, I keep on seeing inaccurate schedule time on the conditional
branches after a store, and tracked it down to this type of solution.
On my machine, I can run these two in the same cycle, but with a
REG_DEP_OUTPUT dependency it was moving the branch to the next cycle.



I kinda wanted a control dependency of some sort, but those seem only
to be used for predicated instructions, which doesn’t apply to the
case at hand.  For the instructions, from my view, there are no data
dependencies at all.  What dependencies exists is the dependency that
one cannot interchange:



   mem[$r1] = 1
   if ($r2) jump L5



This last case being the one that I’m interested in.  So, the
question is, is the below patch the right way to fix this?


I'd recommend trying to fix this with the adjust_cost hook instead.


Bernd


Re: [PATCH] c/67882 - improve -Warray-bounds for invalid offsetof

2015-10-27 Thread Bernd Schmidt

On 10/26/2015 01:27 PM, Richard Biener wrote:

On Mon, Oct 26, 2015 at 1:01 PM, Jakub Jelinek  wrote:

On Mon, Oct 26, 2015 at 12:57:53PM +0100, Bernd Schmidt wrote:

On 10/26/2015 12:47 PM, Jakub Jelinek wrote:


Because the amount of code that uses this (including GCC itself) is just too
huge, so everywhere in the middle-end we also special case last array members of
structs.  While C99+ has flexible array members, e.g. C++ does not, so users
are left with using struct { ... type fld[1]; };


Yes, and that case is documented. However, the issue is arrays declared with
a larger size than 1 or 0 - is there really code using them as flexible
array members?


I believe so, though don't have pointers to that right now.  But vaguely
remember we saw various cases of using 2 or other values too.


Yes, char[4] is quite common (basically making sure there is no appearant
padding behind the array due to alignment - just in case compilers might
be clever because of that).


Ugh, how ugly. Can we at least agree not to allow multi-dimensional 
arrays with a size larger than one to be considered flexible?



Bernd



Re: [AArch64][PATCH 2/7] Add sqrdmah, sqrdmsh instructions.

2015-10-27 Thread James Greenhalgh
On Fri, Oct 23, 2015 at 01:19:20PM +0100, Matthew Wahab wrote:
> The ARMv8.1 architecture extension adds two Adv.SIMD instructions,
> sqrdmlah and sqrdmlsh. This patch adds the instructions to the
> aarch64-simd patterns, making them conditional on the TARGET_SIMD_RDMA
> feature macro introduced in the previous patch.
> 
> The instructions patterns are defined using unspec expressions, so that
> they are only generated through builtins added by this patch series. To
> simplify the definition, iterators SQRDMLAH and rdma_as are added, to
> iterate over the add (sqrdmlah) and subtract (sqrdmlsh) forms of the
> instructions.
> 
> Tested the series for aarch64-none-linux-gnu with native bootstrap and
> make check on an ARMv8 architecture. Also tested aarch64-none-elf with
> cross-compiled check-gcc on an ARMv8.1 emulator.
> 
> Ok for trunk?
> Matthew

OK with the name of the iterator fixed to something more clear as to what
you are iterating over.

> gcc/
> 2015-10-23  Matthew Wahab  
> 
>   * config/aarch64/aarch64-simd.md
>   (aarch64_sqmovun): Fix some white-space.
>   (aarch64_qmovun): Likewise.
>   (aarch64_sqrdmlh): New.
>   (aarch64_sqrdmlh_lane): New.
>   (aarch64_sqrdmlh_laneq): New.
>   * config/aarch64/iterators.md (UNSPEC_SQRDMLAH): New.
>   (UNSPEC_SQRDMLSH): New.
>   (SQRDMLAH): New.
>   (rdma_as): New.
> 

> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index 964f8f1..409ba7b 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -303,6 +303,8 @@
>  UNSPEC_PMULL2   ; Used in aarch64-simd.md.
>  UNSPEC_REV_REGLIST  ; Used in aarch64-simd.md.
>  UNSPEC_VEC_SHR  ; Used in aarch64-simd.md.
> +UNSPEC_SQRDMLAH ; Used in aarch64-simd.md.
> +UNSPEC_SQRDMLSH ; Used in aarch64-simd.md.
>  ])
>  
>  ;; ---
> @@ -932,6 +934,8 @@
> UNSPEC_SQSHRN UNSPEC_UQSHRN
> UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
>  
> +(define_int_iterator SQRDMLAH [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH])
> +

This name does not make it clear that you will iterate over an "A" and an
"S" form. I'd like to see a clearer naming choice, RDMAS? SQRDMLHADDSUB? etc.

Thanks,
James



Re: [AArch64][PATCH 3/7] Add builtins for ARMv8.1 Adv.SIMD,instructions.

2015-10-27 Thread James Greenhalgh
On Fri, Oct 23, 2015 at 01:20:55PM +0100, Matthew Wahab wrote:
> The ARMv8.1 architecture extension adds two Adv.SIMD instructions,
> sqrdmlah and sqrdmlsh. This patch adds the GCC builtins to generate the new
> instructions which are needed for the NEON intrinsics added later in
> this series.
> 
> Tested the series for aarch64-none-linux-gnu with native bootstrap and
> make check on an ARMv8 architecture. Also tested aarch64-none-elf with
> cross-compiled check-gcc on an ARMv8.1 emulator.
> 
> Ok for trunk?
> Matthew

OK.

Thanks,
James

> 
> gcc/
> 2015-10-23  Matthew Wahab  
> 
>   * config/aarch64/aarch64-simd-builtins.def
>   (sqrdmlah, sqrdmlsh): New.
>   (sqrdmlah_lane, sqrdmlsh_lane): New.
>   (sqrdmlah_laneq, sqrdmlsh_laneq): New.
> 



Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread Bernd Schmidt

On 10/19/2015 09:55 PM, H.J. Lu wrote:

 * calls.c (prepare_call_address): Don't handle -fno-plt here.


Is any other target using -fno-plt? If not, and if that's really just a 
revert I'll approve it on the condition that the x86 maintainers are 
happy with the rest of the changes.


Your patch is word-wrapped.


Bernd


Re: Fold comparisons between sqrt and zero

2015-10-27 Thread Richard Biener
On Tue, Oct 27, 2015 at 10:21 AM, Richard Sandiford
 wrote:
> Richard Biener  writes:
>> On Mon, Oct 26, 2015 at 10:36 AM, Richard Sandiford
>>  wrote:
>>> An upcoming patch adds a fold from hypot(x,x) to fabs(x)*sqrt(2).
>>> This is unusual in that it could trigger in the gimplifier but would
>>> require new SSA names to be created.  This patch makes sure that we
>>> don't try to create new SSA names when we're not yet in SSA form.
>>>
>>> Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi.
>>> OK to install?
>>
>> Please use
>>
>>  if (gimple_in_ssa_p (cfun))
>>   res = make_ssa_name (type);
>>  else
>>   res = create_tmp_reg (type);
>>
>> Ok with that change.
>
> Thanks, committed in that form.  I retested the series on top of it and
> the create_tmp_reg causes the later signbit patch (not yet committed)
> to regress on:
>
>   signbit(sqrt(x))
>
> which is always 0 for -ffast-math.  The signbit fold first converts it to:
>
>   sqrt(x) < 0
>
> and whether we realise that this is false depends on a race between two
> folders: the sqrt comparison folder, which wants to convert it to:
>
>   x < 0*0
>
> and the generic tree_expr_nonnegative_p rule for ... < 0, which would
> give the hoped-for 0.
>
> The sqrt code already handles comparisons with negative values specially,
> so this patch simply extends that idea to comparisons with zero.
>
> I don't really like patches like this since they'll probably help no
> real code, but still...
>
> Tested on x86_64-linux-gnu, aarch64-linux-gnu and arm-linux-gnueabi.
> OK to install?

Ok.

Thanks,
Richard.

> Thanks,
> Richard
>
>
> gcc/
> * match.pd: Handle sqrt(x) cmp 0 specially.
>
> gcc/testsuite/
> * gcc.dg/torture/builtin-sqrt-cmp-1.c: New test.
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 26491d2..b8e6b46 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1973,6 +1973,25 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> { constant_boolean_node (true, type); })
> /* sqrt(x) > y is the same as x >= 0, if y is negative.  */
> (ge @0 { build_real (TREE_TYPE (@0), dconst0); })))
> + (if (real_equal (TREE_REAL_CST_PTR (@1), &dconst0))
> +  (switch
> +   /* sqrt(x) < 0 is always false.  */
> +   (if (cmp == LT_EXPR)
> +   { constant_boolean_node (false, type); })
> +   /* sqrt(x) >= 0 is always true if we don't care about NaNs.  */
> +   (if (cmp == GE_EXPR && !HONOR_NANS (@0))
> +   { constant_boolean_node (true, type); })
> +   /* sqrt(x) <= 0 -> x == 0.  */
> +   (if (cmp == LE_EXPR)
> +   (eq @0 @1))
> +   /* Otherwise sqrt(x) cmp 0 -> x cmp 0.  Here cmp can be >=, >,
> +  == or !=.  In the last case:
> +
> +   (sqrt(x) != 0) == (NaN != 0) == true == (x != 0)
> +
> + if x is negative or NaN.  Due to -funsafe-math-optimizations,
> + the results for other x follow from natural arithmetic.  */
> +   (cmp @0 @1)))
>   (if (cmp == GT_EXPR || cmp == GE_EXPR)
>(with
> {
> diff --git a/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c 
> b/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c
> new file mode 100644
> index 000..3f4a708
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/builtin-sqrt-cmp-1.c
> @@ -0,0 +1,53 @@
> +/* { dg-do link } */
> +/* { dg-options "-ffast-math" } */
> +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
> +
> +extern double sqrt (double);
> +extern float sqrtf (float);
> +extern long double sqrtl (long double);
> +
> +/* All references to link_error should go away at compile-time.  */
> +extern void link_error (void);
> +
> +#define TEST_ONE(SUFFIX, TYPE) \
> +  void __attribute__ ((noinline, noclone)) \
> +  test##SUFFIX (TYPE f, int *res)  \
> +  {\
> +TYPE sqrt_res = sqrt##SUFFIX (f);  \
> +res[0] = sqrt_res < 0; \
> +if (res[0])\
> +  link_error ();   \
> +res[1] = sqrt_res <= 0;\
> +if (res[1] != (f == 0))\
> +  link_error ();   \
> +res[2] = (sqrt_res == 0);  \
> +if (res[2] != (f == 0))\
> +  link_error ();   \
> +res[3] = (sqrt_res != 0);  \
> +if (res[3] != (f != 0))\
> +  link_error ();   \
> +res[4] = (sqrt_res > 0);   \
> +if (res[4] != (f > 0)) \
> +  link_error ();   \
> +res[5] = (sqrt_res >= 0);  \
> +if (!res[5])   \
> +  link_error ();   \
> +  }
> +
> +volatile float f;
> +volatile double d;
> +volatile long double ld;
> +
> +TEST_ONE (f, float)
> +TEST_ONE (, double)
> +TEST_ONE (l, long double)
> +
> +int
> 

Re: [AArch64][PATCH 4/7] Add ACLE feature macro for ARMv8.1,Adv.SIMD instructions.

2015-10-27 Thread James Greenhalgh
On Fri, Oct 23, 2015 at 01:22:16PM +0100, Matthew Wahab wrote:
> The ARMv8.1 architecture extension adds two Adv.SIMD instructions,
> sqrdmlah and sqrdmlsh. This patch adds the feature macro
> __ARM_FEATURE_QRDMX to indicate the presence of these instructions,
> generating it when the feature is available, as it is when
> -march=armv8.1-a is selected.
> 
> Tested the series for aarch64-none-linux-gnu with native bootstrap and
> make check on an ARMv8 architecture. Also tested aarch64-none-elf with
> cross-compiled check-gcc on an ARMv8.1 emulator.
> 
> Ok for trunk?
> Matthew

I don't see this macro documented in the versions of ACLE available from
the ARM documentation sites, and googling doesn't show anything other
than your patches. You don't explicitly mention anywhere in cover text for
this series where these new features are (or will be?) documented.

Could you please write a more complete description of where these new
macros and intrinsics come from and what they are intended to do? I would
not like to accept them without some confidence that these names have
been finalized, and I am nervous about having the best description of the
behaviour of them be the GCC source code.

Richard, Marcus?

Thanks,
James

> 
> gcc/
> 2015-10-23  Matthew Wahab  
> 
>   * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Add
>   ARM_FEATURE_QRDMX.
> 


Re: [Patch] Avoid is_simple_use bug in vectorizable_live_operation

2015-10-27 Thread Richard Biener
On Mon, Oct 26, 2015 at 6:15 PM, Alan Hayward  wrote:
>
>
> On 26/10/2015 13:35, "Richard Biener"  wrote:
>
>>On Mon, Oct 26, 2015 at 1:33 PM, Alan Hayward 
>>wrote:
>>> There is a potential bug in vectorizable_live_operation.
>>>
>>> Consider the case where the first op for stmt is valid, but the second
>>>is
>>> null.
>>> The first time through the for () loop, it will call out to
>>> vect_is_simple_use () which will set dt.
>>> The second time, because op is null, vect_is_simple_use () will not be
>>> called.
>>> However, dt is still set to a valid value, therefore the loop will
>>> continue.
>>>
>>> Note this is different from the case where the first op is null, which
>>> will cause the loop not call vect_is_simple_use () and then return
>>>false.
>>>
>>> It is possible that this was intentional, however that is not clear from
>>> the code.
>>>
>>> The fix is to simply ensure dt is initialized to a default value on each
>>> iteration.
>>
>>I think the patch is a strict improvement, thus OK.  Still a NULL operand
>>is not possible in GIMPLE so the op && check is not necessary.  The way
>>it iterates over all stmt uses is a bit scary anyway.  As it is ok with
>>all invariants it should probably simply do sth like
>>
>>   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
>> if (!vect_is_simple_use (op, ))
>>
>>and be done with that.  Unvisited uses can only be constants (ok).
>>
>>Care to rework the funtion like that if you are here?
>>
>
> Ok, I’ve updated as requested.

Ok.  Please remove

  code = gimple_assign_rhs_code (stmt);
  op_type = TREE_CODE_LENGTH (code);
  rhs_class = get_gimple_rhs_class (code);
  gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
  gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);

and associated variables as I belive otherwise a --disable-checking build
will fail with set-but-not-used warnings.  And the asserts are quite pointless
given the now sanitized loop.

Thanks,
Richard.

>
> Cheers,
> Alan.
>


Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread H.J. Lu
On Tue, Oct 27, 2015 at 4:20 AM, Bernd Schmidt  wrote:
> On 10/19/2015 09:55 PM, H.J. Lu wrote:
>>
>>  * calls.c (prepare_call_address): Don't handle -fno-plt here.
>
>
> Is any other target using -fno-plt? If not, and if that's really just a

aarch64 is the only target which checks -fno-plt from commit log below.
I CCed the code author.  aarch64 may suffer from the same issue.

> revert I'll approve it on the condition that the x86 maintainers are happy
> with the rest of the changes.

Uros, can you take a look?

> Your patch is word-wrapped.

Sorry for that.  Here is the link for the patch:

https://gcc.gnu.org/git/?p=gcc.git;a=patch;h=a116a4af073bb99c3117dae38f02d528815bc431

Thanks.

-- 
H.J.
---
Author: jiwang 
Date:   Thu Aug 6 16:02:16 2015 +

[AArch64] Tighten direct call pattern for sibcall to repair -fno-plt

2015-08-06  Jiong Wang  

gcc/
  * config/aarch64/constraints.md (Usf): Add the test of
  aarch64_is_noplt_call_p.

gcc/testsuite/
  * gcc.target/aarch64/noplt_3.c: New testcase.



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226682
138bc75d-0d04-0410-961f-82ee72b054a4

commit 2bcb7473b37f9aa76e530f0a2007893489f61586
Author: jiwang 
Date:   Thu Aug 6 15:57:36 2015 +

[AArch64] Tighten direct call pattern to repair -fno-plt

2015-08-06  Jiong Wang  

gcc/
  * config/aarch64/aarch64-protos.h (aarch64_is_noplt_call_p): New
declaration.
  * config/aarch64/aarch64.c (aarch64_is_noplt_call_p): New function.
  * config/aarch64/aarch64.md (call_value_symbol): Check noplt scenarios.
  (call_symbol): Likewise.

gcc/testsuite/
  * gcc.target/aarch64/noplt_1.c: New testcase.
  * gcc.target/aarch64/noplt_2.c: Likewise.



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226681
138bc75d-0d04-0410-961f-82ee72b054a4


Re: [PATCH]Add -fprofile-use option for check_effective_target_freorder.

2015-10-27 Thread Maxim Ostapenko

On 27/10/15 14:06, Bernd Schmidt wrote:

On 10/27/2015 11:54 AM, Renlin Li wrote:

Yes. In all of the related testcases, only -freorder-and-partition flag
is provided explicitly.
How about creating a new dg-add-options for freorder?

proc add_options_for_freorder { flags } {
 return "$flags -freorder-blocks-and-partition -fprofile-use"
}


proc check_effective_target_freorder {} {
 return [check_no_compiler_messages freorder object {
 void foo (void) { }
 } [add_options_for_freorder ""]]
}


I don't know that tcl syntax but apparently this is done already for a 
number of other cases. So, probably the right thing to do.


You might want to coordinate with Maxim ostapenko who's currently 
working on another -fprofile-use patch. Hopefully not one that 
disables -freorder-blocks-and-partition if it can't find .gcda files.


AFAU, adding -fprofile-use would lead to enabling a bunch of PGO-related 
optimizations wherever you have .gcda file or not. So, you can end up 
with a completely different optimization pipeline for your tests if you 
enable -fprofile-use for them. Is it desirable?


Anyway, disabling any compile options provided by user explicitly sounds 
like a bad idea for me, so disabling -freorder-blocks-and-partition if 
it can't find .gcda file seems to be not acceptable.


-Maxim



Bernd





Re: [PATCH]Add -fprofile-use option for check_effective_target_freorder.

2015-10-27 Thread Bernd Schmidt

On 10/27/2015 12:38 PM, Maxim Ostapenko wrote:


Anyway, disabling any compile options provided by user explicitly sounds
like a bad idea for me, so disabling -freorder-blocks-and-partition if
it can't find .gcda file seems to be not acceptable.


The current situation is that -fr-b-a-p is disabled if -fprofile-use is 
not also present, because it would have no effect in this case. We're 
looking at fixing -fr-b-a-p testcases to pass -fprofile-use; ideally 
that will stay working with your patch (I haven't checked).



Bernd


[Ada] Pragma SPARK_Mode and expression functions

2015-10-27 Thread Arnaud Charlet
This patch ensures that an internally generated subprogram body that completes
an expression function inherits the SPARK_Mode from the expression function
spec.


-- Source --


--  expr_funcs.ads

package Expr_Funcs with SPARK_Mode is
   function F1 return Boolean is (True);  --  F1 spec has mode ON
   function F2 return Boolean;--  F2 spec has mode ON
   function F3 return Boolean;--  F3 spec has mode ON

private
   pragma SPARK_Mode (Off);

-- function F1 return Boolean is  --  F1 body has mode ON
-- begin
--return True;
-- end F1;

   function F2 return Boolean is (True);  --  F2 body has mode OFF
   function F3 return Boolean is (True)   --  F3 body attempts mdoe ON
 with SPARK_Mode; -- Error
end Expr_Funcs;


-- Compilation and output --


$ gcc -c expr_funcs.ads
expr_funcs.ads:16:11: cannot change SPARK_Mode from Off to On
expr_funcs.ads:16:11: SPARK_Mode was set to Off at line 7

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-10-27  Hristian Kirtchev  

* inline.adb (Is_Expression_Function): Removed.
* sem_ch6.adb (Analyze_Subprogram_Body_Helper): An internally
generated subprogram body that completes an expression function
inherits the SPARK_Mode from the spec.
* sem_res.adb (Resolve_Call): Update all calls to
Is_Expression_Function.
* sem_util.ads, sem_util.adb (Is_Expression_Function): Reimplemented.
(Is_Expression_Function_Or_Completion): New routine.

Index: inline.adb
===
--- inline.adb  (revision 229413)
+++ inline.adb  (working copy)
@@ -1357,10 +1357,6 @@
   --  Returns True if subprogram Id is defined in the visible part of a
   --  package specification.
 
-  function Is_Expression_Function (Id : Entity_Id) return Boolean;
-  --  Returns True if subprogram Id was defined originally as an expression
-  --  function.
-
   ---
   -- Has_Formal_With_Discriminant_Dependent_Fields --
   ---
@@ -1472,20 +1468,6 @@
and then List_Containing (Decl) = Visible_Declarations (P);
   end In_Package_Visible_Spec;
 
-  
-  -- Is_Expression_Function --
-  
-
-  function Is_Expression_Function (Id : Entity_Id) return Boolean is
- Decl : Node_Id := Parent (Parent (Id));
-  begin
- if Nkind (Parent (Id)) = N_Defining_Program_Unit_Name then
-Decl := Parent (Decl);
- end if;
-
- return Nkind (Original_Node (Decl)) = N_Expression_Function;
-  end Is_Expression_Function;
-
   
   -- Is_Unit_Subprogram --
   
Index: sem_util.adb
===
--- sem_util.adb(revision 229413)
+++ sem_util.adb(working copy)
@@ -5081,7 +5081,6 @@
   (Is_Concurrent_Type (Scope (Discriminal_Link (E)))
 or else
   Is_Concurrent_Record_Type (Scope (Discriminal_Link (E);
-
end Denotes_Discriminant;
 
-
@@ -11677,26 +11676,46 @@

 
function Is_Expression_Function (Subp : Entity_Id) return Boolean is
-  Decl : Node_Id;
-
begin
-  if Ekind (Subp) /= E_Function then
+  if Ekind_In (Subp, E_Function, E_Subprogram_Body) then
+ return
+   Nkind (Original_Node (Unit_Declaration_Node (Subp))) =
+ N_Expression_Function;
+  else
  return False;
+  end if;
+   end Is_Expression_Function;
 
+   --
+   -- Is_Expression_Function_Or_Completion --
+   --
+
+   function Is_Expression_Function_Or_Completion
+ (Subp : Entity_Id) return Boolean
+   is
+  Subp_Decl : Node_Id;
+
+   begin
+  if Ekind (Subp) = E_Function then
+ Subp_Decl := Unit_Declaration_Node (Subp);
+
+ --  The function declaration is either an expression function or is
+ --  completed by an expression function body.
+
+ return
+   Is_Expression_Function (Subp)
+ or else (Nkind (Subp_Decl) = N_Subprogram_Declaration
+   and then Present (Corresponding_Body (Subp_Decl))
+   and then Is_Expression_Function
+  (Corresponding_Body (Subp_Decl)));
+
+  elsif Ekind (Subp) = E_Subprogram_Body then
+ return Is_Expression_Function (Subp);
+
   else
- Decl := Unit_Declaration_Node (Subp);
- return Nkind (Decl) = N_Subprogram_Declaration
-   and then
- (Nkind (Original_Node (Decl)

[Ada] Delete response file even when link failed

2015-10-27 Thread Arnaud Charlet
When a response file was created and the link failed, the response file
was not deleted. It is deleted now.

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-10-27  Vincent Celier  

* gnatlink.adb: Always delete the response file, even when the
invocation of gcc to link failed.

Index: gnatlink.adb
===
--- gnatlink.adb(revision 229413)
+++ gnatlink.adb(working copy)
@@ -1859,6 +1859,10 @@
--  been compiled.
 
if Opt.CodePeer_Mode then
+  if Tname_FD /= Invalid_FD then
+ Delete (Tname);
+  end if;
+
   return;
end if;
 
@@ -2052,16 +2056,14 @@
 
  System.OS_Lib.Spawn (Linker_Path.all, Args, Success);
 
- if Success then
+ --  Delete the temporary file used in conjunction with linking if one
+ --  was created. See Process_Bind_File for details.
 
---  Delete the temporary file used in conjunction with linking
---  if one was created. See Process_Bind_File for details.
+ if Tname_FD /= Invalid_FD then
+Delete (Tname);
+ end if;
 
-if Tname_FD /= Invalid_FD then
-   Delete (Tname);
-end if;
-
- else
+ if not Success then
 Error_Msg ("error when calling " & Linker_Path.all);
 Exit_Program (E_Fatal);
  end if;


Re: [PATCH, GCC 5 branch] Fix compile time regression caused by fix to PR64111

2015-10-27 Thread Richard Biener
On Mon, Oct 26, 2015 at 6:14 PM, Caroline Tice  wrote:
> Here is my promised backport to the GCC 5 branch, for the patch below
> that went into ToT last week.  As with the previous patch, I've
> verified that it fixes the problem, bootstraps and has no new
> regression test failures.  Is this ok to commit to the gcc-5-branch?

Ok.

Thanks,
Richard.

> -- Caroline Tice
> cmt...@google.com
>
>
> On Fri, Oct 23, 2015 at 3:22 PM, Caroline Tice  wrote:
>> This patch fixes a compile-time regression that was originally
>> introduced by the fix
>> for PR64111, in GCC 4.9.3.One of our user's encountered this problem 
>> with a
>> particular file, where the compile time (on arm) went from 20 seconds
>> to 150 seconds.
>>
>> The fix in this patch was suggested by Richard Biener, who wrote the
>> original fix for
>> PR64111.  I have verified that this patch fixes the compile time
>> regression; I have bootstrapped
>> the compiler with this patch; and I have run the regression testsuite
>> (no regressions).
>> Is this ok to commit to ToT?   (I am also working on backports for
>> gcc-5_branch and gcc-4_9-branch).
>>
>> -- Caroline Tice
>> cmt...@google.com
>>
>  gcc/ChangeLog:
>
>  2015-10-26  Caroline Tice  
>
>  (from Richard Biener)
>  * tree.c (int_cst_hasher::hash):  Replace XOR with more efficient
>  call to iterative_hash_host_wide_int.


Re: [PATCH, GCC 4.9 branch] Fix compile time regression caused by fix to PR64111

2015-10-27 Thread Richard Biener
On Tue, Oct 27, 2015 at 4:36 AM, Caroline Tice  wrote:
> Here is my promised backport to the GCC 4.9 branch, for the patch below
> that went into ToT last week.  As with the previous patch, I've
> verified that it fixes the problem, bootstraps and has no new
> regression test failures.  Is this ok to commit to the gcc-4_9-branch?

Ok.

Thanks,
Richard.

> -- Caroline Tice
> cmt...@google.com
>
> gcc/ChangeLog:
>
>  2015-10-26  Caroline Tice  
>
> (from Richard Biener)
>  * tree.c (int_cst_hash_hash):  Replace XORs with more efficient
>  calls to iterative_hash_host_wide_int.


[Ada] Legality checks on initialization of limited objects

2015-10-27 Thread Arnaud Charlet
This patch corrects an omission on the legality check for allocators with
a qualified expression when the expression is of a limited type. The check
must be performed after the expression is fully resolved, to handle properly
complex overloading cases such as indexings of parameterless functions that
return arrays.

ACATS test B750a04.

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-10-27  Ed Schonberg  

* sem_ch4.adb (Analyze_Allocator): Do not perform legality check
on allocators for limited objects in a qualified expression,
because expression has not been resolved.
* sem_res.adb (Resolve_Allocator): Perform check on legality of
limited objects after resolution.  Add sem_ch3.adb to context.

Index: sem_res.adb
===
--- sem_res.adb (revision 229421)
+++ sem_res.adb (working copy)
@@ -57,6 +57,7 @@
 with Sem_Attr; use Sem_Attr;
 with Sem_Cat;  use Sem_Cat;
 with Sem_Ch4;  use Sem_Ch4;
+with Sem_Ch3;  use Sem_Ch3;
 with Sem_Ch6;  use Sem_Ch6;
 with Sem_Ch8;  use Sem_Ch8;
 with Sem_Ch13; use Sem_Ch13;
@@ -4680,6 +4681,22 @@
  Check_Non_Static_Context (Expression (E));
  Check_Unset_Reference (Expression (E));
 
+ --  Allocators generated by the build-in-place expansion mechanism
+ --  are explicitly marked as coming from source but do not need to be
+ --  checked for limited initialization. To exclude this case, ensure
+ --  that the parent of the allocator is a source node.
+
+ if Is_Limited_Type (Etype (E))
+   and then Comes_From_Source (N)
+   and then Comes_From_Source (Parent (N))
+   and then not In_Instance_Body
+ then
+if not OK_For_Limited_Init (Etype (E), Expression (E)) then
+   Error_Msg_N ("initialization not allowed for limited types", N);
+   Explain_Limited_Type (Etype (E), N);
+end if;
+ end if;
+
  --  A qualified expression requires an exact match of the type.
  --  Class-wide matching is not allowed.
 
Index: sem_ch4.adb
===
--- sem_ch4.adb (revision 229413)
+++ sem_ch4.adb (working copy)
@@ -549,22 +549,6 @@
  Type_Id := Etype (E);
  Set_Directly_Designated_Type (Acc_Type, Type_Id);
 
- --  Allocators generated by the build-in-place expansion mechanism
- --  are explicitly marked as coming from source but do not need to be
- --  checked for limited initialization. To exclude this case, ensure
- --  that the parent of the allocator is a source node.
-
- if Is_Limited_Type (Type_Id)
-   and then Comes_From_Source (N)
-   and then Comes_From_Source (Parent (N))
-   and then not In_Instance_Body
- then
-if not OK_For_Limited_Init (Type_Id, Expression (E)) then
-   Error_Msg_N ("initialization not allowed for limited types", N);
-   Explain_Limited_Type (Type_Id, N);
-end if;
- end if;
-
  --  A qualified expression requires an exact match of the type,
  --  class-wide matching is not allowed.
 


[Ada] Spurious error on legal use of abstract state constituent

2015-10-27 Thread Arnaud Charlet
This patch modifies the analysis of pragma Refined_Global to treat objects and
states as constituents only when their encapsulating state appears in pragma
Global.


-- Source --


--  pack.ads

package Pack
  with Abstract_State => (State1, State2),
   Initializes=> (Var, State1, State2)
is
   Var : Integer := 0;

   procedure Initialize_State
 with Global  => (Output => State1),
  Depends => (State1 => null);
end Pack;

--  pack.adb

package body Pack
  with Refined_State => (State1 => (A, B),
 State2 => (Inner.Inner_Var, Inner.Inner_State))
is
   A, B : Integer;

   package Inner
 with Abstract_State => Inner_State,
  Initializes=> (Inner_State,
 Inner_Var => Var)
   is
  Inner_Var : Integer;

  procedure Initialize_Inner
with Global  => (Output => (Inner_State, Inner_Var),
 Input  => Var),
 Depends => (Inner_State => null,
 Inner_Var   => Var);
   end Inner;

   procedure Initialize_State is separate
 with Refined_Global  => (Output => (A, B)),
  Refined_Depends => ((A, B) => null);

   procedure Double_B is separate
 with Global  => (In_Out => B),
  Depends => (B => B);

   package body Inner is separate;
begin
   Initialize_State;
   Double_B;
end Pack;

--  pack-double_b.adb

separate (Pack)

procedure Double_B is
begin
   B := B * 2;
end Double_B;

--  pack-initialize_state.adb

separate (Pack)

procedure Initialize_State is
begin
   A := 0;
   B := 0;
end Initialize_State;

--  pack-inner.adb

separate (Pack)

package body Inner
  with Refined_State => (Inner_State => Inner_Body_Var)
is
   Inner_Body_Var : Integer;

   procedure Initialize_Inner
 with Refined_Global  => (Output => (Inner_Body_Var, Inner_Var),
  Input  => Var),
  Refined_Depends => (Inner_Body_Var => null,
  Inner_Var  => Var)
   is
   begin
  Inner_Body_Var := 0;
  Inner_Var  := Var;
   end Initialize_Inner;
begin
   Initialize_Inner;
end Inner;

-
-- Compilation --
-

$ gcc -c pack.adb

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-10-27  Hristian Kirtchev  

* sem_prag.adb (Analyze_Refined_Global_In_Decl_Part): Add variable
States.
(Check_Refined_Global_Item): An object or state acts as a
constituent only when the corresponding encapsulating state
appears in pragma Global.
(Collect_Global_Item): Add a state with non-null visible refinement to
list States.

Index: sem_prag.adb
===
--- sem_prag.adb(revision 229414)
+++ sem_prag.adb(working copy)
@@ -527,7 +527,7 @@
   --E_Constant - "constant"
   --E_Discriminant - "discriminant"
   --E_Generic_In_Out_Parameter - "generic parameter"
-  --E_Generic_Out_Parameter- "generic parameter"
+  --E_Generic_In_Parameter - "generic parameter"
   --E_In_Parameter - "parameter"
   --E_In_Out_Parameter - "parameter"
   --E_Loop_Parameter   - "loop parameter"
@@ -24057,6 +24057,9 @@
   Spec_Id : Entity_Id;
   --  The entity of the subprogram subject to pragma Refined_Global
 
+  States : Elist_Id := No_Elist;
+  --  A list of all states with visible refinement found in pragma Global
+
   procedure Check_In_Out_States;
   --  Determine whether the corresponding Global pragma mentions In_Out
   --  states with visible refinement and if so, ensure that one of the
@@ -24566,11 +24569,14 @@
  begin
 --  When the state or object acts as a constituent of another
 --  state with a visible refinement, collect it for the state
---  completeness checks performed later on.
+--  completeness checks performed later on. Note that the item
+--  acts as a constituent only when the encapsulating state is
+--  present in pragma Global.
 
 if Ekind_In (Item_Id, E_Abstract_State, E_Constant, E_Variable)
  and then Present (Encapsulating_State (Item_Id))
  and then Has_Visible_Refinement (Encapsulating_State (Item_Id))
+ and then Contains (States, Encapsulating_State (Item_Id))
 then
if Global_Mode = Name_Input then
   Append_New_Elmt (Item_Id, In_Constits);
@@ -24715,6 +24721,8 @@
   Has_Null_State := True;
 
elsif Has_Non_Null_Refinement (Item_Id) then
+  Append_New_Elmt (Item_Id, States);
+
   if Item_Mode = Name_Input then
  Has_In_State := True;
   elsif Item_Mode = Name_In_Out then


Re: [PATCH] [AArch64] Distinct costs for sign and zero extension

2015-10-27 Thread James Greenhalgh
On Tue, Oct 20, 2015 at 07:52:45AM +0800, Andrew Pinski wrote:
> On Tue, Oct 20, 2015 at 7:10 AM, Evandro Menezes  
> wrote:
> > Some micro-architectures may favor one of sign or zero extension over the
> > other in the base plus extended register offset addressing mode.
> 
> Yes I was going to create the same patch as ThunderX is one of those
> micro-architectures.

OK (Though the patch would have looked better had all the in-tree examples
not be no-op splits!).

I've committed this as r229431 on your behalf. Please provide full ChangeLog
entries with the patch submission, I derived the Name/Email fields and
committed this:

2015-10-27  Evandro Menezes  

* config/aarch64/aarch64-protos.h (cpu_addrcost_table): Split member
for register extension into sign and zero register extension.
* config/aarch64/aarch64.c (generic_addrcost_table): Infer values
for sign and zero register extension.
(cortexa57_addrcost_table): Likewise.
(xgene1_addrcost_table): Likewise.

Thanks,
James

> >
> > This patch separates the member "register_extend" of the structure
> > "cpu_addrcost_table" into two, one for sign and the other zero extension.
> >
> > Please, commit if it's alright.
> >
> > Thank you,
> >
> > --
> > Evandro Menezes
> >
> 


Re: [PATCH] Adjust some patterns wrt :c

2015-10-27 Thread Richard Biener
On Mon, 26 Oct 2015, Marc Glisse wrote:

> On Mon, 26 Oct 2015, Richard Biener wrote:
> 
> > @@ -435,7 +435,7 @@ (define_operator_list RINT BUILT_IN_RINT
> > 
> > /* Fold (A & ~B) - (A & B) into (A ^ B) - B.  */
> > (simplify
> > - (minus (bit_and:cs @0 (bit_not @1)) (bit_and:s @0 @1))
> > + (minus (bit_and:cs @0 (bit_not @1)) (bit_and:cs @0 @1))
> >   (minus (bit_xor @0 @1) @1))
> 
> Sorry, I should have listed them all, but the same applies to
> /* Fold (A & B) - (A & ~B) into B - (A ^ B).  */
> a few lines below.

Heh.  I wonder if we can teach genmatch to auto-annotate commutative
operands (detecting the cases where it doesn't get us anything but
redundant patterns).  Or implement a warning that catches them at
least.

Anyway, fixed in my local tree, will go out with my next match.pd
modification.

Richard.


Re: [PATCH][ARM] PR target/67929 Tighten vfp3_const_double_for_bits checks

2015-10-27 Thread Ramana Radhakrishnan
On Wed, Oct 14, 2015 at 1:23 PM, Kyrill Tkachov  wrote:
> Hi all,
>
> This patch fixes the referenced PR by rewriting the
> vfp3_const_double_for_bits function in arm.c
> The function is supposed to accept positive CONST_DOUBLE rtxes whose value
> is an exact power of 2
> and whose log2 is between 1 and 32. That is values like 2.0, 4.0, 8.9, 16.0
> etc...
>
> The current implementation seems to have been written under the assumption
> that exact_real_truncate returns
> false if the input value is not an exact integer, whereas in fact
> exact_real_truncate returns false if the
> truncation operation was not exact, which are different things. This would
> lead the function to accept any
> CONST_DOUBLE that can truncate to a power of 2, such as 4.9, 16.2 etc.
>
> In any case, I've rewritten this function and used the real_isinteger
> predicate to check if the real value
> is an exact integer.
>
> The testcase demonstrates the kind of wrong code that this patch addresses.
>
> This bug appears on GCC 5 and 4.9 as well, but due to the recent
> introduction of CONST_DOUBLE_REAL_VALUE
> this patch doesn't apply on those branches. I will soon post the
> backportable variant.
>
> Bootstrapped and tested on arm-none-linux-gnueabihf.
>
> Ok for trunk?


Thanks, this is OK for trunk and all release branches.

Ramana

>
> Thanks,
> Kyrill
>
> 2015-10-12  Kyrylo Tkachov  
>
> PR target/67929
> * config/arm/arm.c (vfp3_const_double_for_bits): Rewrite.
> * config/arm/constraints.md (Dp): Update callsite.
> * config/arm/predicates.md (const_double_vcvt_power_of_two): Likewise.
>
> 2015-10-12  Kyrylo Tkachov  
>
> PR target/67929
> * gcc.target/arm/pr67929_1.c: New test.


Re: [PATCH, PR67742] Handle restrict struct fields recursively

2015-10-27 Thread Tom de Vries

On 27/10/15 08:25, Tom de Vries wrote:

@@ -5968,7 +5990,16 @@ intra_create_variable_infos (struct function *fn)
  for (; p; p = vi_next (p))
{
  if (p->only_restrict_pointers)
-   make_constraint_from_global_restrict (p, "PARM_RESTRICT");
+   {
+ varinfo_t vi = lookup_restrict_pointed_var (p);
+ if (vi != NULL)
+   {
+ make_constraint_from (p, vi->id);
+ make_restrict_var_constraints (vi);
+   }
+ else
+   make_constraint_from_global_restrict (p, "PARM_RESTRICT");
+   }
  else if (p->may_have_pointers)
make_constraint_from (p, nonlocal_id);
}
-- 1.9.1


Thinking it over a bit more, I realized the constraint handling started 
to be messy. I've reworked the patch series to simplify that first.


 1  Simplify constraint handling
 2  Rename make_restrict_var_constraints to make_param_constraints
 3  Add recursion to make_param_constraints
 4  Add handle_param parameter to create_variable_info_for_1
 5  Handle recursive restrict pointer in create_variable_info_for_1
 6  Handle restrict struct fields recursively

Currently doing bootstrap and regtest on x86_64.

I'll repost the patch series in reply to this message.

Thanks,
- Tom


Re: [PR 67794] Also remap SSA_NAMEs defined in ASMs in IPA-SRA

2015-10-27 Thread Martin Jambor
On Tue, Oct 27, 2015 at 09:56:48AM +0100, Christophe Lyon wrote:
> Hi Martin,
> 
> After your backport in the gcc-5 branch, I see build failures:
> /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:
> In function ‘tree_node* replace_removed_params_ssa_names(tree_node*,
> gimple_statement_base**, ipa_parm_adjustment_vec)’:
> /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4609:
> error: cannot convert ‘gimple_statement_base**’ to
> ‘gimple_statement_base*’ for argument ‘2’ to ‘tree_node*
> make_ssa_name(tree_node*, gimple_statement_base*)’
> /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:
> In function ‘bool
> ipa_sra_modify_function_body(ipa_parm_adjustment_vec)’:
> /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4703:
> error: cannot convert ‘gphi*’ to ‘gimple_statement_base**’ for
> argument ‘2’ to ‘tree_node*
> replace_removed_params_ssa_names(tree_node*, gimple_statement_base**,
> ipa_parm_adjustment_vec)’
> /tmp/2849532_27.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/tree-sra.c:4772:
> error: cannot convert ‘gimple_statement_base*’ to
> ‘gimple_statement_base**’ for argument ‘2’ to ‘tree_node*
> replace_removed_params_ssa_names(tree_node*, gimple_statement_base**,
> ipa_parm_adjustment_vec)’
> make[2]: *** [tree-sra.o] Error 1
> 
> I see this on aarch64* and arm* targets.
> 
> Can you fix this?

Oops, I must have mistakenly committed the trunk version to the
branch.  I have just fixed the problem by committing the following
(after checking that tree-sra.c now matches the one I have tested on
the branch).

Sorry and thanks for reporting,

Martin


2015-10-27  Martin Jambor  

* tree-sra.c (replace_removed_params_ssa_names): Change type of
parameter stmt to gimple.

Index: gcc/tree-sra.c
===
--- gcc/tree-sra.c  (revision 229434)
+++ gcc/tree-sra.c  (working copy)
@@ -4587,7 +4587,7 @@ get_adjustment_for_base (ipa_parm_adjust
ADJUSTMENTS is a pointer to a vector of IPA-SRA adjustments.  */
 
 static tree
-replace_removed_params_ssa_names (tree old_name, gimple *stmt,
+replace_removed_params_ssa_names (tree old_name, gimple stmt,
  ipa_parm_adjustment_vec adjustments)
 {
   struct ipa_parm_adjustment *adj;



Re: [PATCH] Pass manager: add support for termination of pass list

2015-10-27 Thread Martin Liška
On 10/26/2015 02:48 PM, Richard Biener wrote:
> On Thu, Oct 22, 2015 at 1:02 PM, Martin Liška  wrote:
>> On 10/21/2015 04:06 PM, Richard Biener wrote:
>>> On Wed, Oct 21, 2015 at 1:24 PM, Martin Liška  wrote:
 On 10/21/2015 11:59 AM, Richard Biener wrote:
> On Wed, Oct 21, 2015 at 11:19 AM, Martin Liška  wrote:
>> On 10/20/2015 03:39 PM, Richard Biener wrote:
>>> On Tue, Oct 20, 2015 at 3:00 PM, Martin Liška  wrote:
 Hello.

 As part of upcoming merge of HSA branch, we would like to have 
 possibility to terminate
 pass manager after execution of the HSA generation pass. The HSA 
 back-end is implemented
 as a tree pass that directly emits HSAIL from gimple tree 
 representation. The pass operates
 on clones created by HSA IPA pass and the pass manager should stop 
 execution of further
 RTL passes.

 Suggested patch survives bootstrap and regression tests on 
 x86_64-linux-pc.

 What do you think about it?
>>>
>>> Are you sure it works this way?
>>>
>>> Btw, you will miss executing of all the cleanup passes that will
>>> eventually free memory
>>> associated with the function.  So I'd rather support a
>>> TODO_discard_function which
>>> should basically release the body from the cgraph.
>>
>> Hi.
>>
>> Agree with you that I should execute all TODOs, which can be easily done.
>> However, if I just try to introduce the suggested TODO and handle it 
>> properly
>> by calling cgraph_node::release_body, then for instance fn->gimple_df, 
>> fn->cfg are
>> released and I hit ICEs on many places.
>>
>> Stopping the pass manager looks necessary, or do I miss something?
>
> "Stopping the pass manager" is necessary after TODO_discard_function, yes.
> But that may be simply done via a has_body () check then?

 Thanks, there's second version of the patch. I'm going to start regression 
 tests.
>>>
>>> As release_body () will free cfun you should pop_cfun () before
>>> calling it (and thus
>>
>> Well, release_function_body calls both push & pop, so I think calling pop
>> before cgraph_node::release_body is not necessary.
> 
> (ugh).
> 
>> If tried to call pop_cfun before cgraph_node::release_body, I have cfun still
>> pointing to the same (function *) (which is gcc_freed, but cfun != NULL).
> 
> Hmm, I meant to call pop_cfun then after it (unless you want to fix the above,
> none of the freeing functions should techincally need 'cfun', just add
> 'fn' parameters ...).
> 
> I expected pop_cfun to eventually set cfun to NULL if it popped the
> "last" cfun.  Why
> doesn't it do that?
> 
>>> drop its modification).  Also TODO_discard_functiuon should be only set for
>>> local passes thus you should probably add a gcc_assert (cfun).
>>> I'd move its handling earlier, definitely before the ggc_collect, eventually
>>> before the pass_fini_dump_file () (do we want a last dump of the
>>> function or not?).
>>
>> Fully agree, moved here.
>>
>>>
>>> @@ -2397,6 +2410,10 @@ execute_pass_list_1 (opt_pass *pass)
>>>  {
>>>gcc_assert (pass->type == GIMPLE_PASS
>>>   || pass->type == RTL_PASS);
>>> +
>>> +
>>> +  if (!gimple_has_body_p (current_function_decl))
>>> +   return;
>>>
>>> too much vertical space.  With popping cfun before releasing the body the 
>>> check
>>> might just become if (!cfun) and
>>
>> As mentioned above, as release body is symmetric (calling push & pop), the 
>> suggested
>> guard will not work.
> 
> I suggest to fix it.  If it calls push/pop it should leave with the
> original cfun, popping again
> should make it NULL?
> 
>>>
>>> @@ -2409,7 +2426,7 @@ execute_pass_list (function *fn, opt_pass *pass)
>>>  {
>>>push_cfun (fn);
>>>execute_pass_list_1 (pass);
>>> -  if (fn->cfg)
>>> +  if (gimple_has_body_p (current_function_decl) && fn->cfg)
>>>  {
>>>free_dominance_info (CDI_DOMINATORS);
>>>free_dominance_info (CDI_POST_DOMINATORS);
>>>
>>> here you'd need to guard the pop_cfun call on cfun != NULL.  IMHO it's 
>>> better
>>> to not let cfun point to deallocated data.
>>
>> As I've read the code, since we gcc_free 'struct function', one can just 
>> rely on
>> gimple_has_body_p (current_function_decl) as it correctly realizes that
>> DECL_STRUCT_FUNCTION (current_function_decl) == NULL.
> 
> I'm sure that with some GC checking ggc_free makes them #deadbeef or so:
> 
> void
> ggc_free (void *p)
> {
> ...
> #ifdef ENABLE_GC_CHECKING
>   /* Poison the data, to indicate the data is garbage.  */
>   VALGRIND_DISCARD (VALGRIND_MAKE_MEM_UNDEFINED (p, size));
>   memset (p, 0xa5, size);
> #endif
> 
> so I don't think that's a good thing to rely on ;)
> 
> Richard.

Hi Richi, fully agree that relying of 0xdeadbeaf is not good.
I'm sending quite simple patch v4 where I enable popping up
the stack to eventually set cfun 

Re: [PATCH] tree-scalar-evolution.c: Handle LSHIFT by constant

2015-10-27 Thread Alan Lawrence
--in-reply-to 


On 26/10/15 08:58, Richard Biener wrote:
>
> On Fri, Oct 23, 2015 at 5:15 PM, Alan Lawrence  wrote:
>> +  chrec2 = fold_build2 (LSHIFT_EXPR, TREE_TYPE (rhs1),
>> +   build_int_cst (TREE_TYPE (rhs1), 1),
>
> 'type' instead of TREE_TYPE (rhs1)

I presume you mean the first of the two (allowing removal of the chrec_convert),
and that I keep the second TREE_TYPE (rhs1), consistent with your previous
observation that I should do the multiply in the type of rhs1. (This appears
correct looking at e.g. gcc.target/i386/avx2-vpsllwi-2.c)

Hence, I've committed the attached as r229437.

Thanks, Alan
---
 gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c | 33 
 gcc/tree-scalar-evolution.c  | 14 ++
 2 files changed, 47 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c 
b/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c
new file mode 100644
index 000..b1ce2ec
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-shift-1.c
@@ -0,0 +1,33 @@
+/* PR tree-optimization/65963.  */
+#include "tree-vect.h"
+
+#define N 512
+
+int in[2*N], out[N];
+
+__attribute__ ((noinline)) void
+loop (void)
+{
+  for (int i = 0; i < N; i++)
+out[i] = in[i << 1] + 7;
+}
+
+int
+main (int argc, char **argv)
+{
+  check_vect ();
+  for (int i = 0; i < 2*N; i++)
+{
+  in[i] = i;
+  __asm__ volatile ("" : : : "memory");
+}
+  loop ();
+  __asm__ volatile ("" : : : "memory");
+  for (int i = 0; i < N; i++)
+{
+  if (out[i] != i*2 + 7)
+   abort ();
+}
+  return 0;
+}
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect" 
{ target { vect_strided2 } } } } */
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 0753bf3..8e95ddd 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -1840,6 +1840,20 @@ interpret_rhs_expr (struct loop *loop, gimple *at_stmt,
   res = chrec_fold_multiply (type, chrec1, chrec2);
   break;
 
+case LSHIFT_EXPR:
+  /* Handle A<

[PATCH, 1/6] Simplify constraint handling

2015-10-27 Thread Tom de Vries

On 27/10/15 13:24, Tom de Vries wrote:

Thinking it over a bit more, I realized the constraint handling started
to be messy. I've reworked the patch series to simplify that first.

  1Simplify constraint handling
  2Rename make_restrict_var_constraints to make_param_constraints
  3Add recursion to make_param_constraints
  4Add handle_param parameter to create_variable_info_for_1
  5Handle recursive restrict pointer in create_variable_info_for_1
  6Handle restrict struct fields recursively

Currently doing bootstrap and regtest on x86_64.

I'll repost the patch series in reply to this message.


This patch gets rid of this bit of code in intra_create_variable_infos:
...
  if (restrict_pointer_p)
make_constraint_from_global_restrict (p, "PARM_RESTRICT");
  else
..

I already proposed to remove it here ( 
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02426.html ) but there is 
a problem with that approach: It can happen that restrict_pointer_p is 
true, but p->only_restrict_pointers is false. This happens with 
fipa-pta, when create_function_info_for created a varinfo for the 
parameter before intra_create_variable_infos was called.


The patch handles that case now by setting p->only_restrict_pointers.

Thanks,
- Tom
Simplify constraint handling

2015-10-27  Tom de Vries  

	* tree-ssa-structalias.c (intra_create_variable_infos): Simplify
	constraint handling.
---
 gcc/tree-ssa-structalias.c | 17 +++--
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 5e070bc..4610914 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5884,6 +5884,8 @@ intra_create_variable_infos (struct function *fn)
 	  p = create_variable_info_for_1 (t, alias_get_name (t));
 	  insert_vi_for_tree (t, p);
 	}
+  else if (restrict_pointer_p)
+	p->only_restrict_pointers = 1;
 
   /* For restrict qualified pointers build a representative for
 	 the pointed-to object.  Note that this ends up handling
@@ -5902,17 +5904,12 @@ intra_create_variable_infos (struct function *fn)
 	  continue;
 	}
 
-  if (restrict_pointer_p)
-	make_constraint_from_global_restrict (p, "PARM_RESTRICT");
-  else
+  for (; p; p = vi_next (p))
 	{
-	  for (; p; p = vi_next (p))
-	{
-	  if (p->only_restrict_pointers)
-		make_constraint_from_global_restrict (p, "PARM_RESTRICT");
-	  else if (p->may_have_pointers)
-		make_constraint_from (p, nonlocal_id);
-	}
+	  if (p->only_restrict_pointers)
+	make_constraint_from_global_restrict (p, "PARM_RESTRICT");
+	  else if (p->may_have_pointers)
+	make_constraint_from (p, nonlocal_id);
 	}
 }
 
-- 
1.9.1



[PATCH, 2/6] Rename make_restrict_var_constraints to make_param_constraints

2015-10-27 Thread Tom de Vries

On 27/10/15 13:24, Tom de Vries wrote:

Thinking it over a bit more, I realized the constraint handling started
to be messy. I've reworked the patch series to simplify that first.

  1Simplify constraint handling
  2Rename make_restrict_var_constraints to make_param_constraints
  3Add recursion to make_param_constraints
  4Add handle_param parameter to create_variable_info_for_1
  5Handle recursive restrict pointer in create_variable_info_for_1
  6Handle restrict struct fields recursively

Currently doing bootstrap and regtest on x86_64.

I'll repost the patch series in reply to this message.



This no-functional-changes patch:
- moves the one constraint handling loop left in
  intra_create_variable_infos to make_restrict_var_constraints
- renames make_restrict_var_constraints to make_param_constraints
- adds a parameter toplevel to make_param_constraints to distinguish
  between the two calling contexts

Thanks,
- Tom
Rename make_restrict_var_constraints to make_param_constraints

2015-10-27  Tom de Vries  

	* tree-ssa-structalias.c (make_restrict_var_constraints): Rename to ...
	(make_param_constraints): ... this.  Add toplevel parameter.
	(intra_create_variable_infos): Use make_param_constraints.
---
 gcc/tree-ssa-structalias.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 4610914..e88fbf0 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5845,18 +5845,29 @@ debug_solution_for_var (unsigned int var)
   dump_solution_for_var (stderr, var);
 }
 
-/* Register the constraints for restrict var VI.  */
+/* Register the constraints for VI.  If TOPLEVEL then VI is a function
+   parameter, otherwise VI is part of a function parameter.  */
 
 static void
-make_restrict_var_constraints (varinfo_t vi)
+make_param_constraints (varinfo_t vi, bool toplevel)
 {
   for (; vi; vi = vi_next (vi))
 if (vi->may_have_pointers)
   {
 	if (vi->only_restrict_pointers)
-	  make_constraint_from_global_restrict (vi, "GLOBAL_RESTRICT");
+	  {
+	if (toplevel)
+	  make_constraint_from_global_restrict (vi, "PARM_RESTRICT");
+	else
+	  make_constraint_from_global_restrict (vi, "GLOBAL_RESTRICT");
+	  }
 	else
-	  make_copy_constraint (vi, nonlocal_id);
+	  {
+	if (toplevel)
+	  make_constraint_from (vi, nonlocal_id);
+	else
+	  make_copy_constraint (vi, nonlocal_id);
+	  }
   }
 }
 
@@ -5900,17 +5911,11 @@ intra_create_variable_infos (struct function *fn)
 	  vi->is_restrict_var = 1;
 	  insert_vi_for_tree (heapvar, vi);
 	  make_constraint_from (p, vi->id);
-	  make_restrict_var_constraints (vi);
+	  make_param_constraints (vi, false);
 	  continue;
 	}
 
-  for (; p; p = vi_next (p))
-	{
-	  if (p->only_restrict_pointers)
-	make_constraint_from_global_restrict (p, "PARM_RESTRICT");
-	  else if (p->may_have_pointers)
-	make_constraint_from (p, nonlocal_id);
-	}
+  make_param_constraints (p, true);
 }
 
   /* Add a constraint for a result decl that is passed by reference.  */
-- 
1.9.1



[PATCH] Fix PR68067

2015-10-27 Thread Richard Biener

The following patch adjusts negate_expr_p to account for the fact
that we can't generally change a - (b - c) to (c - b) + a because
-INF - 0 is ok while 0 - -INF not.  Similarly for a - (b + c).
While creating testcases I noticed that MULT_EXPR handling is bogus
as well as with -INF/2 * 2 neither operand can be negated safely.

I believe the division case is also still wrong but I refrained
from touching it with this patch.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2015-10-27  Richard Biener  

PR middle-end/68067
* fold-const.c (negate_expr_p): We cannot negate plus or minus
if overflow is not wrapping.  Likewise multiplication unless
one operand is constant and not power of two.
(fold_negate_expr): Adjust accordingly.

* gcc.dg/torture/pr68067-1.c: New testcase.
* gcc.dg/torture/pr68067-2.c: Likewise.

Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 229404)
+++ gcc/fold-const.c(working copy)
@@ -443,7 +443,9 @@ negate_expr_p (tree t)
 
 case PLUS_EXPR:
   if (HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type))
- || HONOR_SIGNED_ZEROS (element_mode (type)))
+ || HONOR_SIGNED_ZEROS (element_mode (type))
+ || (INTEGRAL_TYPE_P (type)
+ && ! TYPE_OVERFLOW_WRAPS (type)))
return false;
   /* -(A + B) -> (-B) - A.  */
   if (negate_expr_p (TREE_OPERAND (t, 1))
@@ -457,12 +459,23 @@ negate_expr_p (tree t)
   /* We can't turn -(A-B) into B-A when we honor signed zeros.  */
   return !HONOR_SIGN_DEPENDENT_ROUNDING (element_mode (type))
 && !HONOR_SIGNED_ZEROS (element_mode (type))
+&& (! INTEGRAL_TYPE_P (type)
+|| TYPE_OVERFLOW_WRAPS (type))
 && reorder_operands_p (TREE_OPERAND (t, 0),
TREE_OPERAND (t, 1));
 
 case MULT_EXPR:
-  if (TYPE_UNSIGNED (TREE_TYPE (t)))
-break;
+  if (TYPE_UNSIGNED (type))
+   break;
+  /* INT_MIN/n * n doesn't overflow while negating one operand it does
+ if n is a power of two.  */
+  if (INTEGRAL_TYPE_P (TREE_TYPE (t))
+ && ! TYPE_OVERFLOW_WRAPS (TREE_TYPE (t))
+ && ! ((TREE_CODE (TREE_OPERAND (t, 0)) == INTEGER_CST
+&& ! integer_pow2p (TREE_OPERAND (t, 0)))
+   || (TREE_CODE (TREE_OPERAND (t, 1)) == INTEGER_CST
+   && ! integer_pow2p (TREE_OPERAND (t, 1)
+   break;
 
   /* Fall through.  */
 
Index: gcc/testsuite/gcc.dg/torture/pr68067-1.c
===
--- gcc/testsuite/gcc.dg/torture/pr68067-1.c(revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr68067-1.c(working copy)
@@ -0,0 +1,12 @@
+/* { dg-do run } */
+
+int main()
+{
+  int a = -1;
+  static int b = -2147483647 - 1;
+  static int c = 0;
+  int t = a - (b - c);
+  if (t != 2147483647)
+__builtin_abort();
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/torture/pr68067-2.c
===
--- gcc/testsuite/gcc.dg/torture/pr68067-2.c(revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr68067-2.c(working copy)
@@ -0,0 +1,13 @@
+/* { dg-do run } */
+
+int main()
+{
+  int a = -1;
+  static int b = -2147483647 - 1;
+  static int c = 0;
+  int t = a - (b + c*-2);
+  if (t != 2147483647)
+__builtin_abort();
+  return 0;
+}
+


Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread Uros Bizjak
On Tue, Oct 27, 2015 at 12:37 PM, H.J. Lu  wrote:
> On Tue, Oct 27, 2015 at 4:20 AM, Bernd Schmidt  wrote:
>> On 10/19/2015 09:55 PM, H.J. Lu wrote:
>>>
>>>  * calls.c (prepare_call_address): Don't handle -fno-plt here.
>>
>>
>> Is any other target using -fno-plt? If not, and if that's really just a
>
> aarch64 is the only target which checks -fno-plt from commit log below.
> I CCed the code author.  aarch64 may suffer from the same issue.
>
>> revert I'll approve it on the condition that the x86 maintainers are happy
>> with the rest of the changes.
>
> Uros, can you take a look?

I can only say that I don't oppose the changes. The details of linking
are out of my expertise, but OTOH, you probably know these details
better than I.

So, I'll rubberstamp the patch with OK, assuming you will fix eventual fallout.

Thanks,
Uros.


Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread Jiong Wang



On 27/10/15 11:37, H.J. Lu wrote:

On Tue, Oct 27, 2015 at 4:20 AM, Bernd Schmidt  wrote:

On 10/19/2015 09:55 PM, H.J. Lu wrote:

  * calls.c (prepare_call_address): Don't handle -fno-plt here.


Is any other target using -fno-plt? If not, and if that's really just a

aarch64 is the only target which checks -fno-plt from commit log below.
I CCed the code author.  aarch64 may suffer from the same issue.


H.J,

  Thanks for the info.

  After a quick reading of the PR at 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67215,
  I found the problem description is very x86 specific, I still don't 
understand what's
  wrong if we do the following transformation when -fno-plt specified 
on x86-74, thus I
  am not sure whether there is problem exist on aarch64. Can you please 
give more explanation?


  callproc2@PLT

  ||
  V

  movqproc2@GOTPCREL(%rip), %rax
  call*%rax


[PATCH, 3/6] Add recursion to make_param_constraints

2015-10-27 Thread Tom de Vries

On 27/10/15 13:24, Tom de Vries wrote:

Thinking it over a bit more, I realized the constraint handling started
to be messy. I've reworked the patch series to simplify that first.

  1Simplify constraint handling
  2Rename make_restrict_var_constraints to make_param_constraints
  3Add recursion to make_param_constraints
  4Add handle_param parameter to create_variable_info_for_1
  5Handle recursive restrict pointer in create_variable_info_for_1
  6Handle restrict struct fields recursively

Currently doing bootstrap and regtest on x86_64.

I'll repost the patch series in reply to this message.



This patch:
- registers the connection between a restrict pointer var and a
  restrict var in a new hash_map restrict_pointed_var.
- move the restrict pointer constraint handling from
  intra_create_variable_infos to make_param_constraints

The result of this and the two preceding patches is that the constraint 
handling for params in intra_create_variable_infos is reduced to a 
single call to make_param_constraints.


Thanks,
- Tom
Add recursion to make_param_constraints

2015-10-27  Tom de Vries  

	* tree-ssa-structalias.c (restrict_pointed_var): New static var.
	(insert_restrict_pointed_var, lookup_restrict_pointed_var): New
	function.
	(make_param_constraints): Handle case that
	lookup_restrict_pointed_var (vi) != NULL.
	(intra_create_variable_infos): Call insert_restrict_pointed_var.
	Simplify constraint handling. Delete restrict_pointed_var.
---
 gcc/tree-ssa-structalias.c | 48 ++
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index e88fbf0..93bc325 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5607,6 +5607,39 @@ check_for_overlaps (vec fieldstack)
   return false;
 }
 
+/* Map from restrict pointer variable info to restrict var variable info.  */
+
+static hash_map *restrict_pointed_var = NULL;
+
+/* Insert VI2 as the restrict var for VI in the restrict_pointed_var map.  */
+
+static void
+insert_restrict_pointed_var (varinfo_t vi, varinfo_t vi2)
+{
+  if (restrict_pointed_var == NULL)
+  restrict_pointed_var = new hash_map;
+
+  bool mapped = restrict_pointed_var->put (vi, vi2);
+  gcc_assert (!mapped);
+}
+
+/* Find the restrict var for restrict pointer VI in the restrict_pointed_var
+   map.  If VI does not exist in the map, return NULL, otherwise, return the
+   varinfo we found.  */
+
+static varinfo_t
+lookup_restrict_pointed_var (varinfo_t vi)
+{
+  if (restrict_pointed_var == NULL)
+return NULL;
+  varinfo_t *slot = restrict_pointed_var->get (vi);
+  if (slot == NULL)
+return NULL;
+
+  return *slot;
+}
+
+
 /* Create a varinfo structure for NAME and DECL, and add it to VARMAP.
This will also create any varinfo structures necessary for fields
of DECL.  */
@@ -5856,7 +5889,13 @@ make_param_constraints (varinfo_t vi, bool toplevel)
   {
 	if (vi->only_restrict_pointers)
 	  {
-	if (toplevel)
+	varinfo_t rvi = lookup_restrict_pointed_var (vi);
+	if (rvi != NULL)
+	  {
+		make_constraint_from (vi, rvi->id);
+		make_param_constraints (rvi, false);
+	  }
+	else if (toplevel)
 	  make_constraint_from_global_restrict (vi, "PARM_RESTRICT");
 	else
 	  make_constraint_from_global_restrict (vi, "GLOBAL_RESTRICT");
@@ -5910,14 +5949,15 @@ intra_create_variable_infos (struct function *fn)
 	  vi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS");
 	  vi->is_restrict_var = 1;
 	  insert_vi_for_tree (heapvar, vi);
-	  make_constraint_from (p, vi->id);
-	  make_param_constraints (vi, false);
-	  continue;
+	  insert_restrict_pointed_var (p, vi);
 	}
 
   make_param_constraints (p, true);
 }
 
+  delete restrict_pointed_var;
+  restrict_pointed_var = NULL;
+
   /* Add a constraint for a result decl that is passed by reference.  */
   if (DECL_RESULT (fn->decl)
   && DECL_BY_REFERENCE (DECL_RESULT (fn->decl)))
-- 
1.9.1



[PATCH, 4/6] Add handle_param parameter to create_variable_info_for_1

2015-10-27 Thread Tom de Vries

On 27/10/15 13:24, Tom de Vries wrote:
it over a bit more, I realized the constraint handling started

to be messy. I've reworked the patch series to simplify that first.

  1Simplify constraint handling
  2Rename make_restrict_var_constraints to make_param_constraints
  3Add recursion to make_param_constraints
  4Add handle_param parameter to create_variable_info_for_1
  5Handle recursive restrict pointer in create_variable_info_for_1
  6Handle restrict struct fields recursively

Currently doing bootstrap and regtest on x86_64.

I'll repost the patch series in reply to this message.


A repost of the patch at 
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02776.html .


I've changed the structure a bit such that it's clear that the remaining 
'build_fake_var_decl' bit is in 'the lookup_vi_for_tree (t) != NULL' branch.


Thanks,
- Tom

Add handle_param parameter to create_variable_info_for_1

2015-10-26  Tom de Vries  

	* tree-ssa-structalias.c (create_variable_info_for_1): Add and handle
	handle_param parameter.
	(create_variable_info_for): Call create_variable_info_for_1 with extra
	arg.
	(intra_create_variable_infos): Same.  Handle case that
	lookup_restrict_pointed_var (p) is not NULL.
---
 gcc/tree-ssa-structalias.c | 53 --
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 93bc325..a639944 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5642,10 +5642,10 @@ lookup_restrict_pointed_var (varinfo_t vi)
 
 /* Create a varinfo structure for NAME and DECL, and add it to VARMAP.
This will also create any varinfo structures necessary for fields
-   of DECL.  */
+   of DECL.  DECL is a function parameter if HANDLE_PARAM is set.  */
 
 static varinfo_t
-create_variable_info_for_1 (tree decl, const char *name)
+create_variable_info_for_1 (tree decl, const char *name, bool handle_param)
 {
   varinfo_t vi, newvi;
   tree decl_type = TREE_TYPE (decl);
@@ -5721,6 +5721,18 @@ create_variable_info_for_1 (tree decl, const char *name)
   if (POINTER_TYPE_P (TREE_TYPE (decl))
 	  && TYPE_RESTRICT (TREE_TYPE (decl)))
 	vi->only_restrict_pointers = 1;
+  if (vi->only_restrict_pointers
+	  && !type_contains_placeholder_p (TREE_TYPE (TREE_TYPE (decl)))
+	  && handle_param)
+	{
+	  varinfo_t rvi;
+	  tree heapvar = build_fake_var_decl (TREE_TYPE (decl_type));
+	  DECL_EXTERNAL (heapvar) = 1;
+	  rvi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS", false);
+	  rvi->is_restrict_var = 1;
+	  insert_vi_for_tree (heapvar, rvi);
+	  insert_restrict_pointed_var (vi, rvi);
+	}
   fieldstack.release ();
   return vi;
 }
@@ -5772,7 +5784,7 @@ create_variable_info_for_1 (tree decl, const char *name)
 static unsigned int
 create_variable_info_for (tree decl, const char *name)
 {
-  varinfo_t vi = create_variable_info_for_1 (decl, name);
+  varinfo_t vi = create_variable_info_for_1 (decl, name, false);
   unsigned int id = vi->id;
 
   insert_vi_for_tree (decl, vi);
@@ -5925,31 +5937,30 @@ intra_create_variable_infos (struct function *fn)
 {
   bool restrict_pointer_p = (POINTER_TYPE_P (TREE_TYPE (t))
  && TYPE_RESTRICT (TREE_TYPE (t)));
-  bool recursive_restrict_p
-	= (restrict_pointer_p
-	   && !type_contains_placeholder_p (TREE_TYPE (TREE_TYPE (t;
   varinfo_t p = lookup_vi_for_tree (t);
   if (p == NULL)
 	{
-	  p = create_variable_info_for_1 (t, alias_get_name (t));
+	  p = create_variable_info_for_1 (t, alias_get_name (t), true);
 	  insert_vi_for_tree (t, p);
 	}
   else if (restrict_pointer_p)
-	p->only_restrict_pointers = 1;
-
-  /* For restrict qualified pointers build a representative for
-	 the pointed-to object.  Note that this ends up handling
-	 out-of-bound references conservatively by aggregating them
-	 in the first/last subfield of the object.  */
-  if (recursive_restrict_p)
 	{
-	  varinfo_t vi;
-	  tree heapvar = build_fake_var_decl (TREE_TYPE (TREE_TYPE (t)));
-	  DECL_EXTERNAL (heapvar) = 1;
-	  vi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS");
-	  vi->is_restrict_var = 1;
-	  insert_vi_for_tree (heapvar, vi);
-	  insert_restrict_pointed_var (p, vi);
+	  p->only_restrict_pointers = 1;
+
+	  /* For restrict qualified pointers build a representative for
+	 the pointed-to object.  Note that this ends up handling
+	 out-of-bound references conservatively by aggregating them
+	 in the first/last subfield of the object.  */
+	  if (!type_contains_placeholder_p (TREE_TYPE (TREE_TYPE (t
+	{
+	  tree heapvar = build_fake_var_decl (TREE_TYPE (TREE_TYPE (t)));
+	  DECL_EXTERNAL (heapvar) = 1;
+	  varinfo_t vi
+		= create_variable_info_for_1 (heapvar, "PARM_NOALIAS", false);
+	  vi->is_restrict_var = 1;
+	  insert_vi_for_tree (heapvar, vi);
+	  insert_restrict_pointed_var (p, vi);
+	}
 	}
 
   make_pa

Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread H.J. Lu
On Tue, Oct 27, 2015 at 5:52 AM, Jiong Wang  wrote:
>
>
> On 27/10/15 11:37, H.J. Lu wrote:
>>
>> On Tue, Oct 27, 2015 at 4:20 AM, Bernd Schmidt 
>> wrote:
>>>
>>> On 10/19/2015 09:55 PM, H.J. Lu wrote:

   * calls.c (prepare_call_address): Don't handle -fno-plt here.
>>>
>>>
>>> Is any other target using -fno-plt? If not, and if that's really just a
>>
>> aarch64 is the only target which checks -fno-plt from commit log below.
>> I CCed the code author.  aarch64 may suffer from the same issue.
>
>
> H.J,
>
>   Thanks for the info.
>
>   After a quick reading of the PR at
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67215,
>   I found the problem description is very x86 specific, I still don't
> understand what's
>   wrong if we do the following transformation when -fno-plt specified on
> x86-74, thus I
>   am not sure whether there is problem exist on aarch64. Can you please give
> more explanation?
>
>   callproc2@PLT
>
>   ||
>   V
>
>   movqproc2@GOTPCREL(%rip), %rax
>   call*%rax

call *proc2@GOTPCREL(%rip)

doesn't use a register and saves one instruction.

-- 
H.J.


[PATCH, 5/6] Handle recursive restrict pointer in create_variable_info_for_1

2015-10-27 Thread Tom de Vries

On 27/10/15 13:24, Tom de Vries wrote:

Thinking it over a bit more, I realized the constraint handling started
to be messy. I've reworked the patch series to simplify that first.

  1Simplify constraint handling
  2Rename make_restrict_var_constraints to make_param_constraints
  3Add recursion to make_param_constraints
  4Add handle_param parameter to create_variable_info_for_1
  5Handle recursive restrict pointer in create_variable_info_for_1
  6Handle restrict struct fields recursively

Currently doing bootstrap and regtest on x86_64.

I'll repost the patch series in reply to this message.


A repost of the patch at 
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02779.html .


With the constraint handling dealt with earlier, it has become rather 
minimal.


Thanks,
- Tom

Handle recursive restrict pointer in create_variable_info_for_1

2015-10-26  Tom de Vries  

	* tree-ssa-structalias.c (create_variable_info_for_1): Enable recursive
	handling of restrict pointers.

	* gcc.dg/tree-ssa/restrict-7.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c | 12 
 gcc/tree-ssa-structalias.c |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
new file mode 100644
index 000..f7a68c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+int
+f (int *__restrict__ *__restrict__ *__restrict__ a, int *b)
+{
+  *b = 1;
+  ***a  = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index a639944..a297a36 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5728,7 +5728,7 @@ create_variable_info_for_1 (tree decl, const char *name, bool handle_param)
 	  varinfo_t rvi;
 	  tree heapvar = build_fake_var_decl (TREE_TYPE (decl_type));
 	  DECL_EXTERNAL (heapvar) = 1;
-	  rvi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS", false);
+	  rvi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS", true);
 	  rvi->is_restrict_var = 1;
 	  insert_vi_for_tree (heapvar, rvi);
 	  insert_restrict_pointed_var (vi, rvi);
-- 
1.9.1



[PATCH, 6/6] Handle restrict struct fields recursively

2015-10-27 Thread Tom de Vries

On 27/10/15 13:24, Tom de Vries wrote:

Thinking it over a bit more, I realized the constraint handling started
to be messy. I've reworked the patch series to simplify that first.

  1Simplify constraint handling
  2Rename make_restrict_var_constraints to make_param_constraints
  3Add recursion to make_param_constraints
  4Add handle_param parameter to create_variable_info_for_1
  5Handle recursive restrict pointer in create_variable_info_for_1
  6Handle restrict struct fields recursively

Currently doing bootstrap and regtest on x86_64.

I'll repost the patch series in reply to this message.



A repost of the patch at 
https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02838.html .


Thanks,
- Tom

Handle restrict struct fields recursively

2015-10-26  Tom de Vries  

	* tree-ssa-structalias.c (struct fieldoff): Add restrict_var field.
	(push_fields_onto_fieldstack): Add and handle handle_param parameter.
	(create_variable_info_for_1): Add extra arg to call to
	push_fields_onto_fieldstack.  Handle restrict pointer fields.

	* gcc.dg/tree-ssa/restrict-8.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c | 17 
 gcc/tree-ssa-structalias.c | 32 +-
 2 files changed, 44 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
new file mode 100644
index 000..b0ab164
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/restrict-8.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-fre1" } */
+
+struct s
+{
+  int *__restrict__ *__restrict__ pp;
+};
+
+int
+f (struct s s, int *b)
+{
+  *b = 1;
+  **s.pp = 2;
+  return *b;
+}
+
+/* { dg-final { scan-tree-dump-times "return 1" 1 "fre1" } } */
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index a297a36..fac1739 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -399,6 +399,7 @@ new_var_info (tree t, const char *name)
   return ret;
 }
 
+static varinfo_t create_variable_info_for_1 (tree, const char *, bool);
 
 /* A map mapping call statements to per-stmt variables for uses
and clobbers specific to the call.  */
@@ -5196,6 +5197,8 @@ struct fieldoff
   unsigned may_have_pointers : 1;
 
   unsigned only_restrict_pointers : 1;
+
+  varinfo_t restrict_var;
 };
 typedef struct fieldoff fieldoff_s;
 
@@ -5290,11 +5293,12 @@ field_must_have_pointers (tree t)
OFFSET is used to keep track of the offset in this entire
structure, rather than just the immediately containing structure.
Returns false if the caller is supposed to handle the field we
-   recursed for.  */
+   recursed for.  If HANDLE_PARAM is set, we're handling part of a function
+   parameter.  */
 
 static bool
 push_fields_onto_fieldstack (tree type, vec *fieldstack,
-			 HOST_WIDE_INT offset)
+			 HOST_WIDE_INT offset, bool handle_param)
 {
   tree field;
   bool empty_p = true;
@@ -5320,7 +5324,7 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	|| TREE_CODE (field_type) == UNION_TYPE)
 	  push = true;
 	else if (!push_fields_onto_fieldstack
-		(field_type, fieldstack, offset + foff)
+		(field_type, fieldstack, offset + foff, handle_param)
 		 && (DECL_SIZE (field)
 		 && !integer_zerop (DECL_SIZE (field
 	  /* Empty structures may have actual size, like in C++.  So
@@ -5341,7 +5345,8 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 	if (!pair
 		&& offset + foff != 0)
 	  {
-		fieldoff_s e = {0, offset + foff, false, false, false, false};
+		fieldoff_s e = {0, offset + foff, false, false, false, false,
+NULL};
 		pair = fieldstack->safe_push (e);
 	  }
 
@@ -5375,6 +5380,19 @@ push_fields_onto_fieldstack (tree type, vec *fieldstack,
 		  = (!has_unknown_size
 		 && POINTER_TYPE_P (field_type)
 		 && TYPE_RESTRICT (field_type));
+		if (handle_param
+		&& e.only_restrict_pointers
+		&& !type_contains_placeholder_p (TREE_TYPE (field_type)))
+		  {
+		varinfo_t rvi;
+		tree heapvar = build_fake_var_decl (TREE_TYPE (field_type));
+		DECL_EXTERNAL (heapvar) = 1;
+		rvi = create_variable_info_for_1 (heapvar, "PARM_NOALIAS",
+		  true);
+		rvi->is_restrict_var = 1;
+		insert_vi_for_tree (heapvar, rvi);
+		e.restrict_var = rvi;
+		  }
 		fieldstack->safe_push (e);
 	  }
 	  }
@@ -5679,7 +5697,7 @@ create_variable_info_for_1 (tree decl, const char *name, bool handle_param)
   bool notokay = false;
   unsigned int i;
 
-  push_fields_onto_fieldstack (decl_type, &fieldstack, 0);
+  push_fields_onto_fieldstack (decl_type, &fieldstack, 0, handle_param);
 
   for (i = 0; !notokay && fieldstack.iterate (i, &fo); i++)
 	if (fo->has_unknown_size
@@ -5770,6 +5788,10 @@ create_variable_info_for_1 (tree decl, const char *name, bool handle_param)

[PATCH] Add -fchecking

2015-10-27 Thread Richard Biener

This adds -fchecking as a way to enable internal consistency checks
even in release builds (or disable checking with -fno-checking - up to
a certain extent - with checking enabled).

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Richard.

2015-10-27  Richard Biener  

* common.opt (fchecking): New flag controlling flag_checking.
* passes.c (execute_function_todo): Guard checking code with
flag_checking instead of ENABLE_CHECKING.
(execute_todo): Likewise.
(execute_one_pass): Likewise.
(verify_curr_properties): Always compile.
* timevar.c: Include options.h.
(timer::print): Adjust output.
* doc/invoke.texi (fchecking): Document.

Index: gcc/common.opt
===
*** gcc/common.opt  (revision 229404)
--- gcc/common.opt  (working copy)
*** int optimize_fast
*** 46,56 
  Variable
  bool in_lto_p = false
  
- ; Enable additional checks of internal state consistency, which may slow
- ; the compiler down.
- Variable
- bool flag_checking = CHECKING_P
- 
  ; 0 means straightforward implementation of complex divide acceptable.
  ; 1 means wide ranges of inputs must work for complex divide.
  ; 2 means C99-like requirements for complex multiply and divide.
--- 46,51 
*** fcheck-new
*** 1002,1007 
--- 997,1006 
  Common Var(flag_check_new)
  Check the return value of new in C++.
  
+ fchecking
+ Common Var(flag_checking) Init(CHECKING_P)
+ Perform internal consistency checkings.
+ 
  fcombine-stack-adjustments
  Common Report Var(flag_combine_stack_adjustments) Optimization
  Looks for opportunities to reduce stack adjustments and stack references.
Index: gcc/passes.c
===
*** gcc/passes.c(revision 229404)
--- gcc/passes.c(working copy)
*** execute_function_todo (function *fn, voi
*** 1952,1960 
  
gcc_assert (dom_info_state (fn, CDI_POST_DOMINATORS) == DOM_NONE);
/* If we've seen errors do not bother running any verifiers.  */
!   if (!seen_error ())
  {
- #if defined ENABLE_CHECKING
dom_state pre_verify_state = dom_info_state (fn, CDI_DOMINATORS);
dom_state pre_verify_pstate = dom_info_state (fn, CDI_POST_DOMINATORS);
  
--- 1952,1960 
  
gcc_assert (dom_info_state (fn, CDI_POST_DOMINATORS) == DOM_NONE);
/* If we've seen errors do not bother running any verifiers.  */
!   if (!seen_error ()
!   && flag_checking)
  {
dom_state pre_verify_state = dom_info_state (fn, CDI_DOMINATORS);
dom_state pre_verify_pstate = dom_info_state (fn, CDI_POST_DOMINATORS);
  
*** execute_function_todo (function *fn, voi
*** 1988,1994 
/* Make sure verifiers don't change dominator state.  */
gcc_assert (dom_info_state (fn, CDI_DOMINATORS) == pre_verify_state);
gcc_assert (dom_info_state (fn, CDI_POST_DOMINATORS) == 
pre_verify_pstate);
- #endif
  }
  
fn->last_verified = flags & TODO_verify_all;
--- 1991,1996 
*** execute_function_todo (function *fn, voi
*** 2008,2018 
  static void
  execute_todo (unsigned int flags)
  {
! #if defined ENABLE_CHECKING
!   if (cfun
&& need_ssa_update_p (cfun))
  gcc_assert (flags & TODO_update_ssa_any);
- #endif
  
timevar_push (TV_TODO);
  
--- 2010,2019 
  static void
  execute_todo (unsigned int flags)
  {
!   if (flag_checking
!   && cfun
&& need_ssa_update_p (cfun))
  gcc_assert (flags & TODO_update_ssa_any);
  
timevar_push (TV_TODO);
  
*** clear_last_verified (function *fn, void
*** 2076,2089 
  /* Helper function. Verify that the properties has been turn into the
 properties expected by the pass.  */
  
- #ifdef ENABLE_CHECKING
  static void
  verify_curr_properties (function *fn, void *data)
  {
unsigned int props = (size_t)data;
gcc_assert ((fn->curr_properties & props) == props);
  }
- #endif
  
  /* Initialize pass dump file.  */
  /* This is non-static so that the plugins can use it.  */
--- 2077,2088 
*** execute_one_pass (opt_pass *pass)
*** 2331,2344 
/* Run pre-pass verification.  */
execute_todo (pass->todo_flags_start);
  
! #ifdef ENABLE_CHECKING
!   do_per_function (verify_curr_properties,
!  (void *)(size_t)pass->properties_required);
! #endif
  
/* If a timevar is present, start it.  */
if (pass->tv_id != TV_NONE)
! timevar_push (pass->tv_id);
  
/* Do it!  */
todo_after = pass->execute (cfun);
--- 2330,2342 
/* Run pre-pass verification.  */
execute_todo (pass->todo_flags_start);
  
!   if (flag_checking)
! do_per_function (verify_curr_properties,
!(void *)(size_t)pass->properties_required);
  
/* If a timevar is present, start it.  */
if (pass->tv_id != TV_NONE)
! timevar_push (pass->tv_id);
  

Re: [Patch] Avoid is_simple_use bug in vectorizable_live_operation

2015-10-27 Thread Alan Hayward


On 27/10/2015 11:36, "Richard Biener"  wrote:

>On Mon, Oct 26, 2015 at 6:15 PM, Alan Hayward 
>wrote:
>>
>>
>> On 26/10/2015 13:35, "Richard Biener" 
>>wrote:
>>
>>>On Mon, Oct 26, 2015 at 1:33 PM, Alan Hayward 
>>>wrote:
 There is a potential bug in vectorizable_live_operation.

 Consider the case where the first op for stmt is valid, but the second
is
 null.
 The first time through the for () loop, it will call out to
 vect_is_simple_use () which will set dt.
 The second time, because op is null, vect_is_simple_use () will not be
 called.
 However, dt is still set to a valid value, therefore the loop will
 continue.

 Note this is different from the case where the first op is null, which
 will cause the loop not call vect_is_simple_use () and then return
false.

 It is possible that this was intentional, however that is not clear
from
 the code.

 The fix is to simply ensure dt is initialized to a default value on
each
 iteration.
>>>
>>>I think the patch is a strict improvement, thus OK.  Still a NULL
>>>operand
>>>is not possible in GIMPLE so the op && check is not necessary.  The way
>>>it iterates over all stmt uses is a bit scary anyway.  As it is ok with
>>>all invariants it should probably simply do sth like
>>>
>>>   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
>>> if (!vect_is_simple_use (op, ))
>>>
>>>and be done with that.  Unvisited uses can only be constants (ok).
>>>
>>>Care to rework the funtion like that if you are here?
>>>
>>
>> Ok, I’ve updated as requested.
>
>Ok.  Please remove
>
>  code = gimple_assign_rhs_code (stmt);
>  op_type = TREE_CODE_LENGTH (code);
>  rhs_class = get_gimple_rhs_class (code);
>  gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
>  gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
>
>and associated variables as I belive otherwise a --disable-checking build
>will fail with set-but-not-used warnings.  And the asserts are quite
>pointless
>given the now sanitized loop.
>

I did consider removing those asserts, but didn’t want to remove any
important checks. Didn’t think about the disable-checking build.
Anyway, new patch attached.


Cheers,
Alan.



avoid_issimple3.patch
Description: Binary data


Re: [mask conversion, patch 1/2] Add pattern for mask conversions

2015-10-27 Thread Richard Biener
On Mon, Oct 19, 2015 at 2:22 PM, Ilya Enkovich  wrote:
> Hi,
>
> This patch adds a vectorization pattern which detects cases where mask 
> conversion is needed and adds it.  It is done for all statements which may 
> consume mask.  Some additional changes were made to support MASK_LOAD with 
> pattern and allow scalar mode for vectype of pattern stmt.  It is applied on 
> top of all other boolean vector series.  Does it look OK?

Ick - more patterns.  I was really hoping we could get away without
patterns for bools :/

Ok.

Thanks,
Richard.

> Thanks,
> Ilya
> --
> gcc/
>
> 2015-10-19  Ilya Enkovich  
>
> * optabs.c (expand_binop_directly): Allow scalar mode for
> vec_pack_trunc_optab.
> * tree-vect-loop.c (vect_determine_vectorization_factor): Skip
> boolean vector producers from pattern sequence when computing VF.
> * tree-vect-patterns.c (vect_vect_recog_func_ptrs) Add
> vect_recog_mask_conversion_pattern.
> (search_type_for_mask): Choose the smallest
> type if different size types are mixed.
> (build_mask_conversion): New.
> (vect_recog_mask_conversion_pattern): New.
> (vect_pattern_recog_1): Allow scalar mode for boolean vectype.
> * tree-vect-stmts.c (vectorizable_mask_load_store): Support masked
> load with pattern.
> (vectorizable_conversion): Support boolean vectors.
> (free_stmt_vec_info): Allow patterns for statements with no lhs.
> * tree-vectorizer.h (NUM_PATTERNS): Increase to 14.
>
>
> diff --git a/gcc/optabs.c b/gcc/optabs.c
> index 83f4be3..8d61d33 100644
> --- a/gcc/optabs.c
> +++ b/gcc/optabs.c
> @@ -1055,7 +1055,8 @@ expand_binop_directly (machine_mode mode, optab 
> binoptab,
>/* The mode of the result is different then the mode of the
>  arguments.  */
>tmp_mode = insn_data[(int) icode].operand[0].mode;
> -  if (GET_MODE_NUNITS (tmp_mode) != 2 * GET_MODE_NUNITS (mode))
> +  if (VECTOR_MODE_P (mode)
> + && GET_MODE_NUNITS (tmp_mode) != 2 * GET_MODE_NUNITS (mode))
> {
>   delete_insns_since (last);
>   return NULL_RTX;
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 14804b3..e388533 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -497,6 +497,17 @@ vect_determine_vectorization_factor (loop_vec_info 
> loop_vinfo)
> }
>  }
>
> + /* Boolean vectors don't affect VF.  */
> + if (VECTOR_BOOLEAN_TYPE_P (vectype))
> +   {
> + if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
> +   {
> + pattern_def_seq = NULL;
> + gsi_next (&si);
> +   }
> + continue;
> +   }
> +
>   /* The vectorization factor is according to the smallest
>  scalar type (or the largest vector size, but we only
>  support one vector size per loop).  */
> diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> index a737129..34b1ea6 100644
> --- a/gcc/tree-vect-patterns.c
> +++ b/gcc/tree-vect-patterns.c
> @@ -76,6 +76,7 @@ static gimple *vect_recog_mult_pattern (vec *,
>  static gimple *vect_recog_mixed_size_cond_pattern (vec *,
>   tree *, tree *);
>  static gimple *vect_recog_bool_pattern (vec *, tree *, tree *);
> +static gimple *vect_recog_mask_conversion_pattern (vec *, tree *, 
> tree *);
>  static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
> vect_recog_widen_mult_pattern,
> vect_recog_widen_sum_pattern,
> @@ -89,7 +90,8 @@ static vect_recog_func_ptr 
> vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
> vect_recog_divmod_pattern,
> vect_recog_mult_pattern,
> vect_recog_mixed_size_cond_pattern,
> -   vect_recog_bool_pattern};
> +   vect_recog_bool_pattern,
> +   vect_recog_mask_conversion_pattern};
>
>  static inline void
>  append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
> @@ -3180,7 +3182,7 @@ search_type_for_mask (tree var, vec_info *vinfo)
>enum vect_def_type dt;
>tree rhs1;
>enum tree_code rhs_code;
> -  tree res = NULL;
> +  tree res = NULL, res2;
>
>if (TREE_CODE (var) != SSA_NAME)
>  return NULL;
> @@ -3213,13 +3215,26 @@ search_type_for_mask (tree var, vec_info *vinfo)
>  case BIT_AND_EXPR:
>  case BIT_IOR_EXPR:
>  case BIT_XOR_EXPR:
> -  if (!(res = search_type_for_mask (rhs1, vinfo)))
> -   res = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
> +  res = search_type_for_mask (rhs1, vinfo);
> +  res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
> +  if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
> +   res = res2;
>break;
>
>  default:
>if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
> {
> + tree comp_vectype, mask_type;
> +
> 

Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Richard Biener
On Tue, Oct 27, 2015 at 9:03 AM, Jakub Jelinek  wrote:
> On Mon, Oct 26, 2015 at 03:32:45PM -0700, Nathan Sidwell wrote:
>> Richard, Jakub,
>> this updates patch 1 to use the target-insns.def mechanism of detecting
>> conditionally-implemented instructions.  Otherwise it's the same as
>> yesterday's patch.  To recap:
>>
>> 1) Moved the subcodes to an enumeration in internal-fn.h
>>
>> 2) Remove ECF_LEAF
>>
>> 3) Added check in initialize_ctrl_altering
>>
>> 4) tracer code now (continues) to only look in last stmt of block
>>
>> I looked at fnsplit and do not believe I need changes there.  That's
>> changing things like:
>>   if (cheap test)
>> do cheap thing
>>   else
>> do complex thing
>>
>> to break out the else part into a separate function.   That's fine -- it'll
>> copy the whole CFG of interest.
>
> The question is if some UNIQUE call could be ever considered as part of the
> cheap test or do cheap thing.  If not, everything is fine of course for
> fnsplit.
>
>> ok?
>
> Ok for me, but please wait for Richi's ack too.

+  /* An IFN_UNIQUE call must be duplicated as part of its group,
+or not at all.  */
+  if (is_gimple_call (g) && gimple_call_internal_p (g)
+ && gimple_call_internal_unique_p (g))

&&s always to the next line

Otherwise looks ok to me now.

Thanks,
Richard.

> Jakub


[PATCH] Fix PR68104

2015-10-27 Thread Richard Biener

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-10-27  Richard Biener  

PR tree-optimization/68104
* tree-vect-data-refs.c (vect_compute_data_ref_alignment): Move
strided access check ...
(vect_compute_data_refs_alignment): ... here.

* gcc.dg/torture/pr68104.c: New testcase.

Index: gcc/tree-vect-data-refs.c
===
*** gcc/tree-vect-data-refs.c   (revision 229404)
--- gcc/tree-vect-data-refs.c   (working copy)
*** vect_compute_data_ref_alignment (struct
*** 629,640 
/* Initialize misalignment to unknown.  */
SET_DR_MISALIGNMENT (dr, -1);
  
-   /* Strided accesses perform only component accesses, misalignment 
information
-  is irrelevant for them.  */
-   if (STMT_VINFO_STRIDED_P (stmt_info)
-   && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
- return true;
- 
if (tree_fits_shwi_p (DR_STEP (dr)))
  misalign = DR_INIT (dr);
aligned_to = DR_ALIGNED_TO (dr);
--- 651,656 
*** vect_compute_data_refs_alignment (vec_in
*** 794,811 
unsigned int i;
  
FOR_EACH_VEC_ELT (datarefs, i, dr)
! if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
! && !vect_compute_data_ref_alignment (dr))
!   {
! if (is_a  (vinfo))
!   {
! /* Mark unsupported statement as unvectorizable.  */
! STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
! continue;
!   }
! else
!   return false;
!   }
  
return true;
  }
--- 810,836 
unsigned int i;
  
FOR_EACH_VEC_ELT (datarefs, i, dr)
! {
!   stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
!   if (STMT_VINFO_VECTORIZABLE (stmt_info)
! && !vect_compute_data_ref_alignment (dr))
!   {
! /* Strided accesses perform only component accesses, misalignment
!information is irrelevant for them.  */
! if (STMT_VINFO_STRIDED_P (stmt_info)
! && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
!   continue;
! 
! if (is_a  (vinfo))
!   {
! /* Mark unsupported statement as unvectorizable.  */
! STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
! continue;
!   }
! else
!   return false;
!   }
! }
  
return true;
  }
Index: gcc/testsuite/gcc.dg/torture/pr68104.c
===
*** gcc/testsuite/gcc.dg/torture/pr68104.c  (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr68104.c  (working copy)
***
*** 0 
--- 1,22 
+ /* { dg-do compile } */
+ 
+ typedef struct
+ {
+   char vl;
+   char weight;
+ } ib_vl_arb_element_t;
+ typedef struct { ib_vl_arb_element_t vl_entry[32]; } ib_vl_arb_table_t;
+ typedef enum { IB_SUCCESS } ib_api_status_t;
+ int a, b, d;
+ char c;
+ void fn1();
+ ib_api_status_t fn2()
+ {
+   int e = b;
+   ib_vl_arb_table_t f;
+   if (e)
+ for (a = 0; a < d; a++)
+   f.vl_entry[a].vl &= c;
+   fn1(f);
+   return IB_SUCCESS;
+ }


Re: [Patch] Avoid is_simple_use bug in vectorizable_live_operation

2015-10-27 Thread Richard Biener
On Tue, Oct 27, 2015 at 2:35 PM, Alan Hayward  wrote:
>
>
> On 27/10/2015 11:36, "Richard Biener"  wrote:
>
>>On Mon, Oct 26, 2015 at 6:15 PM, Alan Hayward 
>>wrote:
>>>
>>>
>>> On 26/10/2015 13:35, "Richard Biener" 
>>>wrote:
>>>
On Mon, Oct 26, 2015 at 1:33 PM, Alan Hayward 
wrote:
> There is a potential bug in vectorizable_live_operation.
>
> Consider the case where the first op for stmt is valid, but the second
>is
> null.
> The first time through the for () loop, it will call out to
> vect_is_simple_use () which will set dt.
> The second time, because op is null, vect_is_simple_use () will not be
> called.
> However, dt is still set to a valid value, therefore the loop will
> continue.
>
> Note this is different from the case where the first op is null, which
> will cause the loop not call vect_is_simple_use () and then return
>false.
>
> It is possible that this was intentional, however that is not clear
>from
> the code.
>
> The fix is to simply ensure dt is initialized to a default value on
>each
> iteration.

I think the patch is a strict improvement, thus OK.  Still a NULL
operand
is not possible in GIMPLE so the op && check is not necessary.  The way
it iterates over all stmt uses is a bit scary anyway.  As it is ok with
all invariants it should probably simply do sth like

   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 if (!vect_is_simple_use (op, ))

and be done with that.  Unvisited uses can only be constants (ok).

Care to rework the funtion like that if you are here?

>>>
>>> Ok, I’ve updated as requested.
>>
>>Ok.  Please remove
>>
>>  code = gimple_assign_rhs_code (stmt);
>>  op_type = TREE_CODE_LENGTH (code);
>>  rhs_class = get_gimple_rhs_class (code);
>>  gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
>>  gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
>>
>>and associated variables as I belive otherwise a --disable-checking build
>>will fail with set-but-not-used warnings.  And the asserts are quite
>>pointless
>>given the now sanitized loop.
>>
>
> I did consider removing those asserts, but didn’t want to remove any
> important checks. Didn’t think about the disable-checking build.
> Anyway, new patch attached.

Ok.

Richard.

>
> Cheers,
> Alan.
>


Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread Jiong Wang



On 27/10/15 13:06, H.J. Lu wrote:

On Tue, Oct 27, 2015 at 5:52 AM, Jiong Wang  wrote:


On 27/10/15 11:37, H.J. Lu wrote:

On Tue, Oct 27, 2015 at 4:20 AM, Bernd Schmidt 
wrote:

On 10/19/2015 09:55 PM, H.J. Lu wrote:

   * calls.c (prepare_call_address): Don't handle -fno-plt here.


Is any other target using -fno-plt? If not, and if that's really just a

aarch64 is the only target which checks -fno-plt from commit log below.
I CCed the code author.  aarch64 may suffer from the same issue.


H.J,

   Thanks for the info.

   After a quick reading of the PR at
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67215,
   I found the problem description is very x86 specific, I still don't
understand what's
   wrong if we do the following transformation when -fno-plt specified on
x86-74, thus I
   am not sure whether there is problem exist on aarch64. Can you please give
more explanation?

   callproc2@PLT

   ||
   V

   movqproc2@GOTPCREL(%rip), %rax
   call*%rax

call *proc2@GOTPCREL(%rip)

doesn't use a register and saves one instruction.


OK, then it's fairly x86-64 specific optimization, because we can't do 
"call *mem" in

aarch64 and some other targets.


[PATCH][ARM] Fix costing of vmul+vcvt combine pattern

2015-10-27 Thread Kyrill Tkachov

Hi all,

This patch allows us to handle the *combine_vcvtf2i pattern in rtx costs by 
properly identifying it
as a toint coversion. Before this I saw a pattern like:
(set (reg/i:SI 0 r0)
(fix:SI (fix:SF (mult:SF (reg:SF 16 s0 [ a ])
(const_double:SF 3.2e+1 [0x0.8p+6])

being assigned a cost of 40 because the costs blindly recursed into the 
operands.
With this patch for -mcpu=cortex-a57 I see it being assigned a cost of 4.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Kyrill

2015-10-27  Kyrylo Tkachov  

* config/arm/arm.c (arm_new_rtx_costs, FIX case): Handle
combine_vcvtf2i pattern.
commit 1e040710d1022ce816eac9b4f6065bc7aa2be9cf
Author: Kyrylo Tkachov 
Date:   Wed Oct 14 11:26:07 2015 +0100

[ARM] Fix costing of vmul+vcvt combine pattern

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index b37b507..33ad433 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -11064,6 +11064,23 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
 case UNSIGNED_FIX:
   if (TARGET_HARD_FLOAT)
 	{
+	  /* The *combine_vcvtf2i reduces a vmul+vcvt into
+	 a vcvt fixed-point conversion.  */
+	  if (code == FIX && mode == SImode
+	  && GET_CODE (XEXP (x, 0)) == FIX
+	  && GET_MODE (XEXP (x, 0)) == SFmode
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+	  && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
+		 > 0)
+	{
+	  if (speed_p)
+		*cost += extra_cost->fp[0].toint;
+
+	  *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
+ code, 0, speed_p);
+	  return true;
+	}
+
 	  if (GET_MODE_CLASS (mode) == MODE_INT)
 	{
 	  mode = GET_MODE (XEXP (x, 0));


Re: [PATCH v4] SH FDPIC backend support

2015-10-27 Thread Oleg Endo
On Mon, 2015-10-26 at 22:47 -0400, Rich Felker wrote:
> On Sun, Oct 25, 2015 at 11:28:51PM +0900, Oleg Endo wrote:
> > On Fri, 2015-10-23 at 02:32 -0400, Rich Felker wrote:
> > > Here's my updated version of the FDPIC patch with all requested
> > > changes made and Changelog added. I've included all the original
> > > authors. This is my first time writing such an extensive
> > > Changelog
> > > entry so please let me know if there are things I got wrong.
> > 
> > I took the liberty and fixed some minor formatting trivia and
> > extracted
> > functions sh_emit_storesi and sh_emit_storehi which are used in
> >  sh_trampoline_init to effectively memcpy code into the trampoline
> > area.  Can you please check it?  If it's OK I'll commit the
> > attached
> > patch to trunk.
> 
> Is there anything in particular you'd like me to check? It builds
> fine
> for fdpic target, successfully compiles musl libc.so, and busybox
> runs
> with the resulting libc.so. I did a quick visual inspection of the
> diff between my version and yours too and didn't see anything that
> looked suspicious to me.

Thanks.  I have committed it as r229438 after a sanity check with "make
all" on sh-elf.

The way libcalls are now emitted is a bit unhandy.  If more special-ABI
libcalls are to be added in the future, they all have to do the jsr vs.
bsrf handling (some potential candidates for new libcalls are optimized
soft FP routines).  Then we still have PR 65374 and PR 54019. In the
future maybe we should come up with something that allows emitting
libcalls in a more transparent way...

Cheers,
Oleg


Re: [OpenACC 7/11] execution model

2015-10-27 Thread Nathan Sidwell

On 10/27/15 01:18, Jakub Jelinek wrote:


LGTM, though could I ask you to try to try to move the
struct oacc_collapse
expand_oacc_collapse_init
expand_oacc_collapse_vars
expand_oacc_for
additions somewhere else
(e.g. in between expand_omp_taskreg and expand_omp_for_init_counts),


ok,  I wasn't sure of the best placement.


because it seems patch just got too confused and gave up, so most of
expand_omp_for which I assume is unchanged except for

+  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
+{
+  gcc_assert (!inner_stmt);
+  expand_oacc_for (region, &fd);
+}

addition is considered to be deleted in one place and added into another
one; if patch does this, I'd be afraid svn blame or git blame would do so
too, and thus lose history for expand_omp_for.  If moving it around doesn't
help, no big deal, but if it helps, it would be appreciated.


yeah, I noticed diff got confused. (I'm  not sure the above suggestion will 
resolve it, but we can give it a go.


nathan



Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Nathan Sidwell

On 10/27/15 06:45, Richard Biener wrote:

On Tue, Oct 27, 2015 at 9:03 AM, Jakub Jelinek  wrote:



Ok for me, but please wait for Richi's ack too.


+  /* An IFN_UNIQUE call must be duplicated as part of its group,
+or not at all.  */
+  if (is_gimple_call (g) && gimple_call_internal_p (g)
+ && gimple_call_internal_unique_p (g))

&&s always to the next line


oh, did not know that.


Otherwise looks ok to me now.


Great thanks!

nathan


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Jakub Jelinek
On Tue, Oct 27, 2015 at 07:03:40AM -0700, Nathan Sidwell wrote:
> On 10/27/15 06:45, Richard Biener wrote:
> >On Tue, Oct 27, 2015 at 9:03 AM, Jakub Jelinek  wrote:
> 
> >>Ok for me, but please wait for Richi's ack too.
> >
> >+  /* An IFN_UNIQUE call must be duplicated as part of its group,
> >+or not at all.  */
> >+  if (is_gimple_call (g) && gimple_call_internal_p (g)
> >+ && gimple_call_internal_unique_p (g))
> >
> >&&s always to the next line
> 
> oh, did not know that.

I believe the general rule is if all the conditions are short enough
that everything fits on a single line, you can write it as
  if (a && b && c && d)
but as soon as you need to wrap, it should be one && per line, so
  if (a
  && b
  && c
  && d)
style in that case rather than
  if (a && b
  && c && d)

But, lots of code doesn't do it this way.

Jakub


Re: [PATCH, VECTOR ABI] Add __attribute__((__simd__)) to GCC.

2015-10-27 Thread Kirill Yukhin
Hello Joseph,
On 23 Oct 14:16, Joseph Myers wrote:
> On Fri, 23 Oct 2015, Kirill Yukhin wrote:
> 
> > > You need to update this patch to take account of Marek's fix for bug 
> > > 67964 
> > > (it was because I was suspicious of the "continue;" in this patch 
> > > accepting invalid syntax that I found that bug), retest and resubmit.
> > I've rebased the patch on top of current trunk.
> 
> This isn't taking proper account of Marek's fix.
> 
> > @@ -3993,6 +4001,12 @@ c_parser_attributes (c_parser *parser)
> > break;
> >   continue;
> > }
> > + if (is_attribute_p ("simd", attr_name))
> > +   {
> > + parser->simd_attr_present = 1;
> > + c_parser_consume_token (parser);
> > + continue;
> > +   }
> 
> Any such continue needs first to break if the next token isn't a comma; 
> otherwise you accept bad syntax with no comma between successive 
> attributes.
I've understood your point.
Fixed both C/C++. I've additionally fixed Cilk case in C++ by analogy w/
Marek's fix.

Boostrapped. Regtesting is in progress. Is it ok for trunk if pass?

gcc/
* cp/parser.h (cp_parser): Add simd_attr_present.
* cp/parser.c (cp_parser_late_return_type_opt): Handle 
simd_attr_present,
require comman in __vector__ attribute.
(cp_parser_gnu_attribute_list): Ditto.
* c/c-parser.c (c_parser): Add simd_attr_present flag.
(c_parser_declaration_or_fndef): Call c_parser_declaration_or_fndef
if simd_attr_present is set.
(c_finish_omp_declare_simd): Handle simd_attr_present.
* doc/extend.texi (simd): Document new attribute.
* omp-low.c (pass_omp_simd_clone::gate): If target allows - call
without additional conditions.
gcc/testsuite/
* c-c++-common/attr-simd.c: New test.
* c-c++-common/attr-simd-2.c: Ditto.
* c-c++-common/attr-simd-3.c: Ditto.

--
Thanks, K

index c8c6a2d..b026f72 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -224,6 +224,9 @@ struct GTY(()) c_parser {
   /* Buffer to hold all the tokens from parsing the vector attribute for the
  SIMD-enabled functions (formerly known as elemental functions).  */
   vec  *cilk_simd_fn_tokens;
+
+  /* Designates if "simd" attribute is specified in decl.  */
+  BOOL_BITFIELD simd_attr_present : 1;
 };
 
 
@@ -1701,7 +1704,8 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
   if (declarator == NULL)
{
  if (omp_declare_simd_clauses.exists ()
- || !vec_safe_is_empty (parser->cilk_simd_fn_tokens))
+ || !vec_safe_is_empty (parser->cilk_simd_fn_tokens)
+ || parser->simd_attr_present)
c_finish_omp_declare_simd (parser, NULL_TREE, NULL_TREE,
   omp_declare_simd_clauses);
  c_parser_skip_to_end_of_block_or_statement (parser);
@@ -1797,7 +1801,8 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
  if (!d)
d = error_mark_node;
  if (omp_declare_simd_clauses.exists ()
- || !vec_safe_is_empty (parser->cilk_simd_fn_tokens))
+ || !vec_safe_is_empty (parser->cilk_simd_fn_tokens)
+ || parser->simd_attr_present)
c_finish_omp_declare_simd (parser, d, NULL_TREE,
   omp_declare_simd_clauses);
}
@@ -1810,7 +1815,8 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
  if (!d)
d = error_mark_node;
  if (omp_declare_simd_clauses.exists ()
- || !vec_safe_is_empty (parser->cilk_simd_fn_tokens))
+ || !vec_safe_is_empty (parser->cilk_simd_fn_tokens)
+ || parser->simd_attr_present)
c_finish_omp_declare_simd (parser, d, NULL_TREE,
   omp_declare_simd_clauses);
  start_init (d, asm_name, global_bindings_p ());
@@ -1839,7 +1845,8 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
   chainon (postfix_attrs,
all_prefix_attrs));
  if (omp_declare_simd_clauses.exists ()
- || !vec_safe_is_empty (parser->cilk_simd_fn_tokens))
+ || !vec_safe_is_empty (parser->cilk_simd_fn_tokens)
+ || parser->simd_attr_present)
{
  tree parms = NULL_TREE;
  if (d && TREE_CODE (d) == FUNCTION_DECL)
@@ -1968,7 +1975,8 @@ c_parser_declaration_or_fndef (c_parser *parser, bool 
fndef_ok,
   true, false, NULL, vNULL);
   store_parm_decls ();
   if (omp_declare_simd_clauses.exists ()
- || !vec_safe_is_empty (parser->cilk_simd_fn_tokens))
+ || !vec_safe_is_empty (parser->cilk_sim

Re: Partial fix for LTO bootstrap with Ada

2015-10-27 Thread Tom de Vries

[ cc gcc-patches ]

On 23/10/15 18:40, Eric Botcazou wrote:

--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -5887,7 +5887,10 @@ intra_create_variable_infos (struct function *fn)

 if (POINTER_TYPE_P (TREE_TYPE (t))
&& TYPE_RESTRICT (TREE_TYPE (t)))
-   make_constraint_from_global_restrict (p, "PARM_RESTRICT");
+   {
+ gcc_unreachable ();
+ make_constraint_from_global_restrict (p, "PARM_RESTRICT");
+   }
 else
  {
for (; p; p = vi_next (p))
...
to detect the 'type_contains_placeholder_p' case, but the assert did not
trigger.

Is it possible to add an ada testcase that triggers the
'type_contains_placeholder_p' case?


Well, LTO testcases are painful to create and it's 3 years later...

In any case, since:

2015-10-02  Eric Botcazou  

* gcc-interface/ada-tree.h (DECL_RESTRICTED_ALIASING_P): New flag.
* gcc-interface/decl.c (gnat_to_gnu_param): For parameters passed by
reference but whose type isn't by-ref and whose mechanism hasn't been
forced to by-ref, set the DECL_RESTRICTED_ALIASING_P flag directly on
them instead of changing their type.
* gcc-interface/trans.c (scan_rhs_r): New helper function.
(independent_iterations_p): New predicate.
(Loop_Statement_to_gnu): For a loop with an iteration scheme, set an
ivdep pragma if the iterations are independent.

the Ada compiler doesn't use restrict-qualified pointer types anymore so you
can back out my change from 2012 if need be.


Sofar I didn't need to revert this change.

But, are you saying here it's dead code?

In that case, it might make sense to revert it anyway.

Thanks,
- Tom


Re: [OpenACC 1/11] UNIQUE internal function

2015-10-27 Thread Nathan Sidwell

On 10/27/15 01:03, Jakub Jelinek wrote:

On Mon, Oct 26, 2015 at 03:32:45PM -0700, Nathan Sidwell wrote:



to break out the else part into a separate function.   That's fine -- it'll
copy the whole CFG of interest.


The question is if some UNIQUE call could be ever considered as part of the
cheap test or do cheap thing.  If not, everything is fine of course for
fnsplit.


It doesn't matter (although I doubt the CFG it's attached to will be considered 
cheap) for how I'm using it.  We never generate a CFG where part of the UNIQUE 
sequence will be in the cheap thing block and another part not in the cheap 
thing  block.


nathan


Re: [PATCH, VECTOR ABI] Add __attribute__((__simd__)) to GCC.

2015-10-27 Thread Jakub Jelinek
On Tue, Oct 27, 2015 at 05:06:58PM +0300, Kirill Yukhin wrote:
> Boostrapped. Regtesting is in progress. Is it ok for trunk if pass?
> 
> gcc/
> * cp/parser.h (cp_parser): Add simd_attr_present.
> * cp/parser.c (cp_parser_late_return_type_opt): Handle 
> simd_attr_present,
>   require comman in __vector__ attribute.
> (cp_parser_gnu_attribute_list): Ditto.
> * c/c-parser.c (c_parser): Add simd_attr_present flag.
> (c_parser_declaration_or_fndef): Call c_parser_declaration_or_fndef
> if simd_attr_present is set.

gcc/cp/ and gcc/c/ have their own ChangeLog files, therefore cp/ or c/
prefixes should also never appear in the ChangeLog entries.

> (c_finish_omp_declare_simd): Handle simd_attr_present.
> * doc/extend.texi (simd): Document new attribute.
> * omp-low.c (pass_omp_simd_clone::gate): If target allows - call
> without additional conditions.
> gcc/testsuite/
> * c-c++-common/attr-simd.c: New test.
> * c-c++-common/attr-simd-2.c: Ditto.
> * c-c++-common/attr-simd-3.c: Ditto.
> -  error ("%<#pragma omp declare simd%> cannot be used in the same "
> +  error ("%<#pragma omp declare simd%> or __simd__ attribute cannot be 
> used in the same "

I'd write % instead of __simd__.  __simd__ is just one of the
possible spellings of the attribute...

>"function marked as a Cilk Plus SIMD-enabled function");
>vec_free (parser->cilk_simd_fn_tokens);
>return;
> @@ -15423,7 +15441,7 @@ c_finish_omp_declare_simd (c_parser *parser, tree 
> fndecl, tree parms,
>unsigned int tokens_avail = parser->tokens_avail;
>gcc_assert (parser->tokens == &parser->tokens_buf[0]);
>bool is_cilkplus_cilk_simd_fn = false;
> -  
> + 
>if (flag_cilkplus && !vec_safe_is_empty (parser->cilk_simd_fn_tokens))

If you are changing this, please remove all trailing whitespace from the
empty line.

> +  if (parser->simd_attr_present
> +  && is_cilkplus_cilk_simd_fn)

This could fit on one line
  if (parser->simd_attr_present && is_cilkplus_cilk_simd_fn)
just fine.

> +  error ("SIMD-enabled function attributes"
> +  "are allowed when attribute __simd__ is specified");

See earlier.
>  
> +  /* Attach `omp declare simd’ attribute if __simd__ is specified AND no 
> OpenMP clauses
> + present in decl.  */
> +  if (parser->simd_attr_present
> +  && parser->tokens_avail == 0)

See earlier.

> @@ -19363,6 +19365,18 @@ cp_parser_late_return_type_opt (cp_parser* parser, 
> cp_declarator *declarator,
>= cp_parser_late_parsing_omp_declare_simd (parser,
>declarator->std_attributes);
>  
> +  if (parser->simd_attr_present
> +  && !declare_simd_p)

Ditto.
> +{
> +  if (cilk_simd_fn_vector_p)
> + error ("__simd__ attribute cannot be used in the same function"
> +" marked as a Cilk Plus SIMD-enabled function");

Ditto.

> diff --git a/gcc/omp-low.c b/gcc/omp-low.c
> index ad7c017..232dc5c 100644
> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> @@ -17412,10 +17412,7 @@ public:
>  bool
>  pass_omp_simd_clone::gate (function *)
>  {
> -  return ((flag_openmp || flag_openmp_simd
> -|| flag_cilkplus
> -|| (in_lto_p && !flag_wpa))
> -   && (targetm.simd_clone.compute_vecsize_and_simdlen != NULL));
> +  return targetm.simd_clone.compute_vecsize_and_simdlen != NULL;
>  }
>  
>  } // anon namespace

I wonder what the compile time effect this will have.
have (alternative is of course 

> diff --git a/gcc/testsuite/c-c++-common/attr-simd-2.c 
> b/gcc/testsuite/c-c++-common/attr-simd-2.c
> new file mode 100644
> index 000..e9afc11
> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/attr-simd-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-fdump-tree-optimized -fopenmp-simd" } */
> +
> +#pragma omp declare simd
> +__attribute__((__simd__))
> +static int simd_attr (void)
> +{
> +  return 0;
> +}
> +
> +/* { dg-final { scan-tree-dump "omp declare simd" "optimized" } } */

You should also test other spellings of the attribute...

Jakub


[PATCH] Fix PR66313

2015-10-27 Thread Richard Biener

When factoring a*b + a*c to (b + c)*a we have to guard against the
case of a == 0 as after the factoring b + c might overflow in that
case.  Fixed by doing the addition in an unsigned type if required.

Bootstrap / regtest pending on x86_64-unknown-linux-gnu.

Richard.

2015-10-27  Richard Biener  

PR middle-end/66313
* fold-const.c (fold_plusminus_mult_expr): If the factored
factor may be zero use a wrapping type for the inner operation.

* c-c++-common/ubsan/pr66313.c: New testcase.

Index: gcc/fold-const.c
===
*** gcc/fold-const.c(revision 229404)
--- gcc/fold-const.c(working copy)
*** fold_plusminus_mult_expr (location_t loc
*** 6976,6989 
}
  }
  
!   if (same)
  return fold_build2_loc (loc, MULT_EXPR, type,
fold_build2_loc (loc, code, type,
 fold_convert_loc (loc, type, alt0),
 fold_convert_loc (loc, type, alt1)),
fold_convert_loc (loc, type, same));
  
!   return NULL_TREE;
  }
  
  /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
--- 6989,7016 
}
  }
  
!   if (!same)
! return NULL_TREE;
! 
!   if (! INTEGRAL_TYPE_P (type)
!   || TYPE_OVERFLOW_WRAPS (type)
!   || TREE_CODE (same) == INTEGER_CST)
  return fold_build2_loc (loc, MULT_EXPR, type,
fold_build2_loc (loc, code, type,
 fold_convert_loc (loc, type, alt0),
 fold_convert_loc (loc, type, alt1)),
fold_convert_loc (loc, type, same));
  
!   /* Same may be zero and thus the operation 'code' may overflow.  Perform
!  the addition in an unsigned type.  */
!   tree utype = unsigned_type_for ( type);
!   return fold_build2_loc (loc, MULT_EXPR, type,
! fold_convert_loc
!   (loc, type,
!fold_build2_loc (loc, code, utype,
! fold_convert_loc (loc, utype, alt0),
! fold_convert_loc (loc, utype, alt1))),
! fold_convert_loc (loc, type, same));
  }
  
  /* Subroutine of native_encode_expr.  Encode the INTEGER_CST
Index: gcc/testsuite/c-c++-common/ubsan/pr66313.c
===
*** gcc/testsuite/c-c++-common/ubsan/pr66313.c  (revision 0)
--- gcc/testsuite/c-c++-common/ubsan/pr66313.c  (working copy)
***
*** 0 
--- 1,12 
+ /* { dg-do run } */
+ /* { dg-options "-fsanitize=undefined -fsanitize-undefined-trap-on-error" } */
+ 
+ int __attribute__((noinline,noclone))
+ f(int a, int b, int c)
+ {
+   return a * b + a * c;
+ }
+ int main()
+ {
+   return f(0, __INT_MAX__, __INT_MAX__);
+ }


Re: [PATCH] Fix PR68067

2015-10-27 Thread Richard Biener
On Tue, 27 Oct 2015, Richard Biener wrote:

> 
> The following patch adjusts negate_expr_p to account for the fact
> that we can't generally change a - (b - c) to (c - b) + a because
> -INF - 0 is ok while 0 - -INF not.  Similarly for a - (b + c).
> While creating testcases I noticed that MULT_EXPR handling is bogus
> as well as with -INF/2 * 2 neither operand can be negated safely.
> 
> I believe the division case is also still wrong but I refrained
> from touching it with this patch.

Here is the division part.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.  A
related testcase went in with r202204.

Richard.

2015-10-27  Richard Biener  

* fold-const.c (negate_expr_p): Adjust the division case to
properly avoid introducing undefined overflow.
(fold_negate_expr): Likewise.

Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 229404)
+++ gcc/fold-const.c(working copy)
@@ -475,29 +488,23 @@ negate_expr_p (tree t)
 case TRUNC_DIV_EXPR:
 case ROUND_DIV_EXPR:
 case EXACT_DIV_EXPR:
-  /* In general we can't negate A / B, because if A is INT_MIN and
-B is 1, we may turn this into INT_MIN / -1 which is undefined
-and actually traps on some architectures.  But if overflow is
-undefined, we can negate, because - (INT_MIN / 1) is an
-overflow.  */
   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
{
- if (!TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (t)))
-   break;
- /* If overflow is undefined then we have to be careful because
-we ask whether it's ok to associate the negate with the
-division which is not ok for example for
--((a - b) / c) where (-(a - b)) / c may invoke undefined
-overflow because of negating INT_MIN.  So do not use
-negate_expr_p here but open-code the two important cases.  */
- if (TREE_CODE (TREE_OPERAND (t, 0)) == NEGATE_EXPR
- || (TREE_CODE (TREE_OPERAND (t, 0)) == INTEGER_CST
- && may_negate_without_overflow_p (TREE_OPERAND (t, 0
+ if (negate_expr_p (TREE_OPERAND (t, 0)))
return true;
+
+ /* In general we can't negate B in A / B, because if A is INT_MIN and
+B is 1, we may turn this into INT_MIN / -1 which is undefined
+and actually traps on some architectures.  */
+ if (TYPE_OVERFLOW_WRAPS (TREE_TYPE (t))
+ || (TREE_CODE (TREE_OPERAND (t, 1)) == INTEGER_CST
+ && ! integer_onep (TREE_OPERAND (t, 1
+   return negate_expr_p (TREE_OPERAND (t, 1));
}
-  else if (negate_expr_p (TREE_OPERAND (t, 0)))
-   return true;
-  return negate_expr_p (TREE_OPERAND (t, 1));
+  else
+   return (negate_expr_p (TREE_OPERAND (t, 0))
+   || negate_expr_p (TREE_OPERAND (t, 1)));
+  break;
 
 case NOP_EXPR:
   /* Negate -((double)float) as (double)(-float).  */
@@ -667,40 +681,22 @@ fold_negate_expr (location_t loc, tree t
 case TRUNC_DIV_EXPR:
 case ROUND_DIV_EXPR:
 case EXACT_DIV_EXPR:
-  /* In general we can't negate A / B, because if A is INT_MIN and
+  /* In general we can't negate B in A / B, because if A is INT_MIN and
 B is 1, we may turn this into INT_MIN / -1 which is undefined
-and actually traps on some architectures.  But if overflow is
-undefined, we can negate, because - (INT_MIN / 1) is an
-overflow.  */
-  if (!INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_UNDEFINED (type))
-{
- const char * const warnmsg = G_("assuming signed overflow does not "
- "occur when negating a division");
-  tem = TREE_OPERAND (t, 1);
-  if (negate_expr_p (tem))
-   {
- if (INTEGRAL_TYPE_P (type)
- && (TREE_CODE (tem) != INTEGER_CST
- || integer_onep (tem)))
-   fold_overflow_warning (warnmsg, WARN_STRICT_OVERFLOW_MISC);
- return fold_build2_loc (loc, TREE_CODE (t), type,
- TREE_OPERAND (t, 0), negate_expr (tem));
-   }
- /* If overflow is undefined then we have to be careful because
-we ask whether it's ok to associate the negate with the
-division which is not ok for example for
--((a - b) / c) where (-(a - b)) / c may invoke undefined
-overflow because of negating INT_MIN.  So do not use
-negate_expr_p here but open-code the two important cases.  */
-  tem = TREE_OPERAND (t, 0);
- if ((INTEGRAL_TYPE_P (type)
-  && (TREE_CODE (tem) == NEGATE_EXPR
-  || (TREE_CODE (tem) == INTEGER_CST
-  && may_negate_without_overflow_p (tem
- || !INTEGRAL_TYPE_P (type))
-   return fold_build2_loc (loc, TREE_CODE (t), type

[PATCH] Fix PR65962 fallout

2015-10-27 Thread Richard Biener

This fixes another fallout of the PR65962 fix, gcc.dg/vect/vect-62.c
failing for targets that use constant pool entries for the array
initializer.

On x86_64 we fail to vectorize the 2nd loop in the testcase because
PRE makes the latch block non-empty (and adds a loop carried dependency).

The patch fixes the code that is supposed to inhibit PRE from doing this.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2015-10-27  Richard Biener  

PR tree-optimization/65962
* tree-ssa-pre.c (eliminate_dom_walker::before_dom_children):
Avoid creating loop carried dependences also for outer loops
of the loop a use to replace is in.

* gcc.dg/vect/vect-62.c: Adjust.

Index: gcc/tree-ssa-pre.c
===
*** gcc/tree-ssa-pre.c  (revision 229404)
--- gcc/tree-ssa-pre.c  (working copy)
*** eliminate_dom_walker::before_dom_childre
*** 4082,4089 
  gimple *def_stmt = SSA_NAME_DEF_STMT (sprime);
  basic_block def_bb = gimple_bb (def_stmt);
  if (gimple_code (def_stmt) == GIMPLE_PHI
! && b->loop_father->header == def_bb)
{
  ssa_op_iter iter;
  tree op;
  bool found = false;
--- 4090,4098 
  gimple *def_stmt = SSA_NAME_DEF_STMT (sprime);
  basic_block def_bb = gimple_bb (def_stmt);
  if (gimple_code (def_stmt) == GIMPLE_PHI
! && def_bb->loop_father->header == def_bb)
{
+ loop_p loop = def_bb->loop_father;
  ssa_op_iter iter;
  tree op;
  bool found = false;
*** eliminate_dom_walker::before_dom_childre
*** 4092,4100 
  affine_iv iv;
  def_bb = gimple_bb (SSA_NAME_DEF_STMT (op));
  if (def_bb
! && flow_bb_inside_loop_p (b->loop_father, def_bb)
! && simple_iv (b->loop_father,
!   b->loop_father, op, &iv, true))
{
  found = true;
  break;
--- 4101,4108 
  affine_iv iv;
  def_bb = gimple_bb (SSA_NAME_DEF_STMT (op));
  if (def_bb
! && flow_bb_inside_loop_p (loop, def_bb)
! && simple_iv (loop, loop, op, &iv, true))
{
  found = true;
  break;
*** eliminate_dom_walker::before_dom_childre
*** 4110,4116 
  print_generic_expr (dump_file, sprime, 0);
  fprintf (dump_file, " which would add a loop"
   " carried dependence to loop %d\n",
!  b->loop_father->num);
}
  /* Don't keep sprime available.  */
  sprime = NULL_TREE;
--- 4118,4124 
  print_generic_expr (dump_file, sprime, 0);
  fprintf (dump_file, " which would add a loop"
   " carried dependence to loop %d\n",
!  loop->num);
}
  /* Don't keep sprime available.  */
  sprime = NULL_TREE;
Index: gcc/testsuite/gcc.dg/vect/vect-62.c
===
*** gcc/testsuite/gcc.dg/vect/vect-62.c (revision 229404)
--- gcc/testsuite/gcc.dg/vect/vect-62.c (working copy)
*** int main1 ()
*** 33,41 
  }
  
/* Multidimensional array. Aligned. The "inner" dimensions
!  are invariant in the inner loop. Vectorizable, but the
!  vectorizer detects that everything is invariant and that
!  the loop is better left untouched. (it should be optimized away). */
for (i = 0; i < N; i++)
  {
for (j = 0; j < N; j++)
--- 33,40 
  }
  
/* Multidimensional array. Aligned. The "inner" dimensions
!  are invariant in the inner loop.  The outer loop is
!  vectorizable after invariant/store motion.  */
for (i = 0; i < N; i++)
  {
for (j = 0; j < N; j++)
*** int main (void)
*** 65,69 
return main1 ();
  }
  
! /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
  /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 
"vect" } } */
--- 64,68 
return main1 ();
  }
  
! /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
  /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 0 
"vect" } } */


Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread Ramana Radhakrishnan

> 
> OK, then it's fairly x86-64 specific optimization, because we can't do "call 
> *mem" in
> aarch64 and some other targets.

It is a fairly x86_64 specific optimization and doesn't apply to AArch64.

The question really is what impact does removing the generic code handling have 
on aarch64 - is it a no-op or not for the existing -fno-plt implementation in 
the AArch64 backend ? The only case that is of interest is the bit below in 
calls.c and it looks like that may well be redundant with the logic in the 
backend already, but I have not done the full analysis to convince myself that 
the code in the backend is sufficient.

-  && (!flag_plt
-  || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type)))
-  && !targetm.binds_local_p (fndecl_or_type))


regards
Ramana


[PATCH][RTL-ifcvt] PR rtl-optimization/67749: Do not emit separate SET insn in IF-ELSE case

2015-10-27 Thread Kyrill Tkachov

Hi all,

This patch fixes the gcc.dg/ifcvt-2.c test for x86_64 where we were failing to 
if-convert.
This was because in my patch at 
https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=228194
which tried to emit a SET to move the source of insn_a or insn_b (that came 
from the test block)
into a temporary. A SET however, is not always enough. For example, on x86_64 
in order for the
resulting insn to be recognised it frequently needs to be in PARALLEL with a 
(clobber (reg:CC FLAGS_REG)).
This leads to the insn not being recognised.

So this patch removes that SET and instead generates a couple of temporary 
pseudos that gets passed
on a bit later to the code that loads the operands into registers when they're 
not general_operand.
This way we just modify the existing (recognisable) sets, allowing us to 
if-convert the testcase.

Bootstrapped and tested on x86_64, arm, aarch64.

Ok for trunk?

Thanks,
Kyrill

2015-10-27  Kyrylo Tkachov  

PR rtl-optimization/67749
* ifcvt.c (noce_try_cmove_arith): Do not emit move in IF-ELSE
case before emitting the two blocks.  Instead modify the register
in the corresponding final insn of the basic block.
commit d58740af39ceaf2d654cf3feff97b39fb6da3e82
Author: Kyrylo Tkachov 
Date:   Tue Sep 29 13:44:21 2015 +0100

[RTL-ifcvt] PR rtl-optimization/67749

diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 8846e69..7c6e7ca 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -2135,38 +2135,29 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
  emit might clobber the register used by B or A, so move it to a pseudo
  first.  */
 
+  rtx tmp_a = NULL_RTX;
+  rtx tmp_b = NULL_RTX;
+
   if (b_simple || !else_bb)
-{
-  rtx tmp_b = gen_reg_rtx (x_mode);
-  /* Perform the simplest kind of set.  The register allocator
-	 should remove it if it's not actually needed.  If this set is not
-	 a valid insn (can happen on the is_mem path) then end_ifcvt_sequence
-	 will cancel the whole sequence.  Don't try any of the fallback paths
-	 from noce_emit_move_insn since we want this to be the simplest kind
-	 of move.  */
-  emit_insn (gen_rtx_SET (tmp_b, b));
-  b = tmp_b;
-}
+tmp_b = gen_reg_rtx (x_mode);
 
   if (a_simple || !then_bb)
-{
-  rtx tmp_a = gen_reg_rtx (x_mode);
-  emit_insn (gen_rtx_SET (tmp_a, a));
-  a = tmp_a;
-}
+tmp_a = gen_reg_rtx (x_mode);
 
   orig_a = a;
   orig_b = b;
 
   rtx emit_a = NULL_RTX;
   rtx emit_b = NULL_RTX;
-
+  rtx_insn *tmp_insn = NULL;
+  bool modified_in_a = false;
+  bool  modified_in_b = false;
   /* If either operand is complex, load it into a register first.
  The best way to do this is to copy the original insn.  In this
  way we preserve any clobbers etc that the insn may have had.
  This is of course not possible in the IS_MEM case.  */
 
-  if (! general_operand (a, GET_MODE (a)))
+  if (! general_operand (a, GET_MODE (a)) || tmp_a)
 {
 
   if (is_mem)
@@ -2174,36 +2165,51 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
 	  rtx reg = gen_reg_rtx (GET_MODE (a));
 	  emit_a = gen_rtx_SET (reg, a);
 	}
-  else if (! insn_a)
-	goto end_seq_and_fail;
   else
 	{
-	  a = gen_reg_rtx (GET_MODE (a));
-	  rtx_insn *copy_of_a = as_a  (copy_rtx (insn_a));
-	  rtx set = single_set (copy_of_a);
-	  SET_DEST (set) = a;
+	  if (insn_a)
+	{
+	  a = tmp_a ? tmp_a : gen_reg_rtx (GET_MODE (a));
+
+	  rtx_insn *copy_of_a = as_a  (copy_rtx (insn_a));
+	  rtx set = single_set (copy_of_a);
+	  SET_DEST (set) = a;
 
-	  emit_a = PATTERN (copy_of_a);
+	  emit_a = PATTERN (copy_of_a);
+	}
+	  else
+	{
+	  rtx tmp_reg = tmp_a ? tmp_a : gen_reg_rtx (GET_MODE (a));
+	  emit_a = gen_rtx_SET (tmp_reg, a);
+	  a = tmp_reg;
+	}
 	}
 }
 
-  if (! general_operand (b, GET_MODE (b)))
+  if (! general_operand (b, GET_MODE (b)) || tmp_b)
 {
   if (is_mem)
 	{
   rtx reg = gen_reg_rtx (GET_MODE (b));
 	  emit_b = gen_rtx_SET (reg, b);
 	}
-  else if (! insn_b)
-	goto end_seq_and_fail;
   else
 	{
-  b = gen_reg_rtx (GET_MODE (b));
-	  rtx_insn *copy_of_b = as_a  (copy_rtx (insn_b));
-	  rtx set = single_set (copy_of_b);
+	  if (insn_b)
+	{
+	  b = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b));
+	  rtx_insn *copy_of_b = as_a  (copy_rtx (insn_b));
+	  rtx set = single_set (copy_of_b);
 
-	  SET_DEST (set) = b;
-	  emit_b = PATTERN (copy_of_b);
+	  SET_DEST (set) = b;
+	  emit_b = PATTERN (copy_of_b);
+	}
+	  else
+	{
+	  rtx tmp_reg = tmp_b ? tmp_b : gen_reg_rtx (GET_MODE (b));
+	  emit_b = gen_rtx_SET (tmp_reg, b);
+	  b = tmp_reg;
+	  }
 	}
 }
 
@@ -2211,16 +2217,35 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
swap insn that sets up A with the one that sets up B.  If even
that doesn't help, punt.  */
 
-if (emit_a && modified_in_p (orig_b, emit_a))
-  {
-	if (modified_in_p (orig_a, emit_b))
-	  goto end_seq_and

Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread H.J. Lu
On Tue, Oct 27, 2015 at 7:34 AM, Ramana Radhakrishnan
 wrote:
>
>>
>> OK, then it's fairly x86-64 specific optimization, because we can't do "call 
>> *mem" in
>> aarch64 and some other targets.
>
> It is a fairly x86_64 specific optimization and doesn't apply to AArch64.
>
> The question really is what impact does removing the generic code handling 
> have on aarch64 - is it a no-op or not for the existing -fno-plt 
> implementation in the AArch64 backend ? The only case that is of interest is 
> the bit below in calls.c and it looks like that may well be redundant with 
> the logic in the backend already, but I have not done the full analysis to 
> convince myself that the code in the backend is sufficient.
>
> -  && (!flag_plt
> -  || lookup_attribute ("noplt", DECL_ATTRIBUTES 
> (fndecl_or_type)))
> -  && !targetm.binds_local_p (fndecl_or_type))
>

-fno-plt is a backend specific optimization and should be handled
in backend.

-- 
H.J.


Re: [PATCH] Pass manager: add support for termination of pass list

2015-10-27 Thread Richard Biener
On Tue, Oct 27, 2015 at 1:36 PM, Martin Liška  wrote:
> On 10/26/2015 02:48 PM, Richard Biener wrote:
>> On Thu, Oct 22, 2015 at 1:02 PM, Martin Liška  wrote:
>>> On 10/21/2015 04:06 PM, Richard Biener wrote:
 On Wed, Oct 21, 2015 at 1:24 PM, Martin Liška  wrote:
> On 10/21/2015 11:59 AM, Richard Biener wrote:
>> On Wed, Oct 21, 2015 at 11:19 AM, Martin Liška  wrote:
>>> On 10/20/2015 03:39 PM, Richard Biener wrote:
 On Tue, Oct 20, 2015 at 3:00 PM, Martin Liška  wrote:
> Hello.
>
> As part of upcoming merge of HSA branch, we would like to have 
> possibility to terminate
> pass manager after execution of the HSA generation pass. The HSA 
> back-end is implemented
> as a tree pass that directly emits HSAIL from gimple tree 
> representation. The pass operates
> on clones created by HSA IPA pass and the pass manager should stop 
> execution of further
> RTL passes.
>
> Suggested patch survives bootstrap and regression tests on 
> x86_64-linux-pc.
>
> What do you think about it?

 Are you sure it works this way?

 Btw, you will miss executing of all the cleanup passes that will
 eventually free memory
 associated with the function.  So I'd rather support a
 TODO_discard_function which
 should basically release the body from the cgraph.
>>>
>>> Hi.
>>>
>>> Agree with you that I should execute all TODOs, which can be easily 
>>> done.
>>> However, if I just try to introduce the suggested TODO and handle it 
>>> properly
>>> by calling cgraph_node::release_body, then for instance fn->gimple_df, 
>>> fn->cfg are
>>> released and I hit ICEs on many places.
>>>
>>> Stopping the pass manager looks necessary, or do I miss something?
>>
>> "Stopping the pass manager" is necessary after TODO_discard_function, 
>> yes.
>> But that may be simply done via a has_body () check then?
>
> Thanks, there's second version of the patch. I'm going to start 
> regression tests.

 As release_body () will free cfun you should pop_cfun () before
 calling it (and thus
>>>
>>> Well, release_function_body calls both push & pop, so I think calling pop
>>> before cgraph_node::release_body is not necessary.
>>
>> (ugh).
>>
>>> If tried to call pop_cfun before cgraph_node::release_body, I have cfun 
>>> still
>>> pointing to the same (function *) (which is gcc_freed, but cfun != NULL).
>>
>> Hmm, I meant to call pop_cfun then after it (unless you want to fix the 
>> above,
>> none of the freeing functions should techincally need 'cfun', just add
>> 'fn' parameters ...).
>>
>> I expected pop_cfun to eventually set cfun to NULL if it popped the
>> "last" cfun.  Why
>> doesn't it do that?
>>
 drop its modification).  Also TODO_discard_functiuon should be only set for
 local passes thus you should probably add a gcc_assert (cfun).
 I'd move its handling earlier, definitely before the ggc_collect, 
 eventually
 before the pass_fini_dump_file () (do we want a last dump of the
 function or not?).
>>>
>>> Fully agree, moved here.
>>>

 @@ -2397,6 +2410,10 @@ execute_pass_list_1 (opt_pass *pass)
  {
gcc_assert (pass->type == GIMPLE_PASS
   || pass->type == RTL_PASS);
 +
 +
 +  if (!gimple_has_body_p (current_function_decl))
 +   return;

 too much vertical space.  With popping cfun before releasing the body the 
 check
 might just become if (!cfun) and
>>>
>>> As mentioned above, as release body is symmetric (calling push & pop), the 
>>> suggested
>>> guard will not work.
>>
>> I suggest to fix it.  If it calls push/pop it should leave with the
>> original cfun, popping again
>> should make it NULL?
>>

 @@ -2409,7 +2426,7 @@ execute_pass_list (function *fn, opt_pass *pass)
  {
push_cfun (fn);
execute_pass_list_1 (pass);
 -  if (fn->cfg)
 +  if (gimple_has_body_p (current_function_decl) && fn->cfg)
  {
free_dominance_info (CDI_DOMINATORS);
free_dominance_info (CDI_POST_DOMINATORS);

 here you'd need to guard the pop_cfun call on cfun != NULL.  IMHO it's 
 better
 to not let cfun point to deallocated data.
>>>
>>> As I've read the code, since we gcc_free 'struct function', one can just 
>>> rely on
>>> gimple_has_body_p (current_function_decl) as it correctly realizes that
>>> DECL_STRUCT_FUNCTION (current_function_decl) == NULL.
>>
>> I'm sure that with some GC checking ggc_free makes them #deadbeef or so:
>>
>> void
>> ggc_free (void *p)
>> {
>> ...
>> #ifdef ENABLE_GC_CHECKING
>>   /* Poison the data, to indicate the data is garbage.  */
>>   VALGRIND_DISCARD (VALGRIND_MAKE_MEM_UNDEFINED (p, size));
>>   memset (p, 0xa5, size);
>> #endif
>>
>> so I don't t

[PR fortran/63865] OpenACC cache directive: match Fortran support with C/C++ (was: [gomp4] OpenACC cache directive maintenance)

2015-10-27 Thread Thomas Schwinge
Hi!

On Wed, 05 Nov 2014 17:44:58 +0100, I wrote:
> On Wed, 05 Nov 2014 17:36:46 +0100, I wrote:
> > In r217146, I applied the following to gomp-4_0-branch:
> > 
> > [OpenACC cache directive maintenance in C/C++]

> I also tried to make this work for Fortran, but didn't manage to (in
> a reasonable amount of time, which has not been a lot that I allocated)
> ;-) -- would you please have a look at this (but it's not urgent).
> 
> [WIP patch]

That never got resolved, so I've now done it myself, directly for trunk.
OK to commit?

commit 1e0a6332eb8b713afaeb43b554d4aae501a78bf9
Author: Thomas Schwinge 
Date:   Tue Oct 27 12:20:12 2015 +0100

[PR fortran/63865] OpenACC cache directive: match Fortran support with C/C++

gcc/fortran/
PR fortran/63865
* openmp.c (resolve_oacc_cache): Remove function.
(gfc_match_oacc_cache): Enable array sections.
(resolve_omp_clauses, gfc_resolve_oacc_directive): Change
accordingly.
* trans-openmp.c (gfc_trans_omp_clauses): Likewise.
gcc/testsuite/
PR fortran/63865
* gfortran.dg/goacc/coarray.f95: Expect the OpenACC cache
directive to work.
* gfortran.dg/goacc/loop-1.f95: Likewise.
* gfortran.dg/goacc/cache-1.f95: Likewise, and extend testing.
* gfortran.dg/goacc/cray.f95: Likewise.
* gfortran.dg/goacc/parameter.f95: Likewise.
---
 gcc/fortran/openmp.c  | 16 
 gcc/fortran/trans-openmp.c| 22 --
 gcc/testsuite/gfortran.dg/goacc/cache-1.f95   |  9 +++--
 gcc/testsuite/gfortran.dg/goacc/coarray.f95   |  3 ++-
 gcc/testsuite/gfortran.dg/goacc/cray.f95  |  4 +---
 gcc/testsuite/gfortran.dg/goacc/loop-1.f95|  1 -
 gcc/testsuite/gfortran.dg/goacc/parameter.f95 |  3 +--
 7 files changed, 31 insertions(+), 27 deletions(-)

diff --git gcc/fortran/openmp.c gcc/fortran/openmp.c
index 3c12d8e..6c78c97 100644
--- gcc/fortran/openmp.c
+++ gcc/fortran/openmp.c
@@ -1387,7 +1387,8 @@ gfc_match_oacc_cache (void)
 {
   gfc_omp_clauses *c = gfc_get_omp_clauses ();
   match m = gfc_match_omp_variable_list (" (",
-&c->lists[OMP_LIST_CACHE], true);
+&c->lists[OMP_LIST_CACHE], true,
+NULL, NULL, true);
   if (m != MATCH_YES)
 {
   gfc_free_omp_clauses(c);
@@ -3107,6 +3108,7 @@ resolve_omp_clauses (gfc_code *code, locus *where,
  case OMP_LIST_MAP:
  case OMP_LIST_TO:
  case OMP_LIST_FROM:
+ case OMP_LIST_CACHE:
for (; n != NULL; n = n->next)
  {
if (n->expr)
@@ -3380,7 +3382,6 @@ resolve_omp_clauses (gfc_code *code, locus *where,
   n->sym->name, name, where);
  /* FALLTHRU */
  case OMP_LIST_DEVICE_RESIDENT:
- case OMP_LIST_CACHE:
check_symbol_not_pointer (n->sym, *where, name);
check_array_not_assumed (n->sym, *where, name);
break;
@@ -4597,13 +4598,6 @@ resolve_oacc_loop (gfc_code *code)
 }
 
 
-static void
-resolve_oacc_cache (gfc_code *code ATTRIBUTE_UNUSED)
-{
-  sorry ("Sorry, !$ACC cache unimplemented yet");
-}
-
-
 void
 gfc_resolve_oacc_declare (gfc_namespace *ns)
 {
@@ -4657,6 +4651,7 @@ gfc_resolve_oacc_directive (gfc_code *code, gfc_namespace 
*ns ATTRIBUTE_UNUSED)
 case EXEC_OACC_ENTER_DATA:
 case EXEC_OACC_EXIT_DATA:
 case EXEC_OACC_WAIT:
+case EXEC_OACC_CACHE:
   resolve_omp_clauses (code, &code->loc, code->ext.omp_clauses, NULL,
   true);
   break;
@@ -4665,9 +4660,6 @@ gfc_resolve_oacc_directive (gfc_code *code, gfc_namespace 
*ns ATTRIBUTE_UNUSED)
 case EXEC_OACC_LOOP:
   resolve_oacc_loop (code);
   break;
-case EXEC_OACC_CACHE:
-  resolve_oacc_cache (code);
-  break;
 default:
   break;
 }
diff --git gcc/fortran/trans-openmp.c gcc/fortran/trans-openmp.c
index def8afb..3be9f51 100644
--- gcc/fortran/trans-openmp.c
+++ gcc/fortran/trans-openmp.c
@@ -1778,9 +1778,6 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
case OMP_LIST_DEVICE_RESIDENT:
  clause_code = OMP_CLAUSE_DEVICE_RESIDENT;
  goto add_clause;
-   case OMP_LIST_CACHE:
- clause_code = OMP_CLAUSE__CACHE_;
- goto add_clause;
 
add_clause:
  omp_clauses
@@ -2159,14 +2156,27 @@ gfc_trans_omp_clauses (stmtblock_t *block, 
gfc_omp_clauses *clauses,
  break;
case OMP_LIST_TO:
case OMP_LIST_FROM:
+   case OMP_LIST_CACHE:
  for (; n != NULL; n = n->next)
{
  if (!n->sym->attr.referenced)
continue;
 
- tree node = build_omp_clause (input_location,
-   list == OMP_LIST_TO
- 

Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread Jiong Wang



On 27/10/15 14:50, H.J. Lu wrote:

On Tue, Oct 27, 2015 at 7:34 AM, Ramana Radhakrishnan
 wrote:

OK, then it's fairly x86-64 specific optimization, because we can't do "call 
*mem" in
aarch64 and some other targets.

It is a fairly x86_64 specific optimization and doesn't apply to AArch64.

The question really is what impact does removing the generic code handling have 
on aarch64 - is it a no-op or not for the existing -fno-plt implementation in 
the AArch64 backend ? The only case that is of interest is the bit below in 
calls.c and it looks like that may well be redundant with the logic in the 
backend already, but I have not done the full analysis to convince myself that 
the code in the backend is sufficient.

-  && (!flag_plt
-  || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type)))
-  && !targetm.binds_local_p (fndecl_or_type))


-fno-plt is a backend specific optimization and should be handled
in backend.



The removing of those generic code has broken aarch64.

Actually those code in calls.c shouldn't prevent such "call *mem" 
opportunity on x86-64 because the combine pass
should combine "load reg, symbol + call reg" back into "call *mem" on 
x86-64 as there is related define_insn.


the testcases in PR67215 and included in your patch, all of which are 
loops, failed because either RTL PRE or loop pass will
hoist address calculation pattern as invariant out of loop into another 
basic block different with the call_insn. while combine
pass only work within basic block scope, thus we have missed such 
combine opportunity on x86-64.


I am not sure anyone has done experiment before on extend combine pass 
to larger scope.





Re: [PR fortran/63865] OpenACC cache directive: match Fortran support with C/C++ (was: [gomp4] OpenACC cache directive maintenance)

2015-10-27 Thread Jakub Jelinek
On Tue, Oct 27, 2015 at 04:19:49PM +0100, Thomas Schwinge wrote:
> On Wed, 05 Nov 2014 17:44:58 +0100, I wrote:
> > On Wed, 05 Nov 2014 17:36:46 +0100, I wrote:
> > > In r217146, I applied the following to gomp-4_0-branch:
> > > 
> > > [OpenACC cache directive maintenance in C/C++]
> 
> > I also tried to make this work for Fortran, but didn't manage to (in
> > a reasonable amount of time, which has not been a lot that I allocated)
> > ;-) -- would you please have a look at this (but it's not urgent).
> > 
> > [WIP patch]
> 
> That never got resolved, so I've now done it myself, directly for trunk.
> OK to commit?

Ok.

Jakub


Re: [PATCH] Pass manager: add support for termination of pass list

2015-10-27 Thread Martin Liška
On 10/27/2015 03:49 PM, Richard Biener wrote:
> On Tue, Oct 27, 2015 at 1:36 PM, Martin Liška  wrote:
>> On 10/26/2015 02:48 PM, Richard Biener wrote:
>>> On Thu, Oct 22, 2015 at 1:02 PM, Martin Liška  wrote:
 On 10/21/2015 04:06 PM, Richard Biener wrote:
> On Wed, Oct 21, 2015 at 1:24 PM, Martin Liška  wrote:
>> On 10/21/2015 11:59 AM, Richard Biener wrote:
>>> On Wed, Oct 21, 2015 at 11:19 AM, Martin Liška  wrote:
 On 10/20/2015 03:39 PM, Richard Biener wrote:
> On Tue, Oct 20, 2015 at 3:00 PM, Martin Liška  wrote:
>> Hello.
>>
>> As part of upcoming merge of HSA branch, we would like to have 
>> possibility to terminate
>> pass manager after execution of the HSA generation pass. The HSA 
>> back-end is implemented
>> as a tree pass that directly emits HSAIL from gimple tree 
>> representation. The pass operates
>> on clones created by HSA IPA pass and the pass manager should stop 
>> execution of further
>> RTL passes.
>>
>> Suggested patch survives bootstrap and regression tests on 
>> x86_64-linux-pc.
>>
>> What do you think about it?
>
> Are you sure it works this way?
>
> Btw, you will miss executing of all the cleanup passes that will
> eventually free memory
> associated with the function.  So I'd rather support a
> TODO_discard_function which
> should basically release the body from the cgraph.

 Hi.

 Agree with you that I should execute all TODOs, which can be easily 
 done.
 However, if I just try to introduce the suggested TODO and handle it 
 properly
 by calling cgraph_node::release_body, then for instance fn->gimple_df, 
 fn->cfg are
 released and I hit ICEs on many places.

 Stopping the pass manager looks necessary, or do I miss something?
>>>
>>> "Stopping the pass manager" is necessary after TODO_discard_function, 
>>> yes.
>>> But that may be simply done via a has_body () check then?
>>
>> Thanks, there's second version of the patch. I'm going to start 
>> regression tests.
>
> As release_body () will free cfun you should pop_cfun () before
> calling it (and thus

 Well, release_function_body calls both push & pop, so I think calling pop
 before cgraph_node::release_body is not necessary.
>>>
>>> (ugh).
>>>
 If tried to call pop_cfun before cgraph_node::release_body, I have cfun 
 still
 pointing to the same (function *) (which is gcc_freed, but cfun != NULL).
>>>
>>> Hmm, I meant to call pop_cfun then after it (unless you want to fix the 
>>> above,
>>> none of the freeing functions should techincally need 'cfun', just add
>>> 'fn' parameters ...).
>>>
>>> I expected pop_cfun to eventually set cfun to NULL if it popped the
>>> "last" cfun.  Why
>>> doesn't it do that?
>>>
> drop its modification).  Also TODO_discard_functiuon should be only set 
> for
> local passes thus you should probably add a gcc_assert (cfun).
> I'd move its handling earlier, definitely before the ggc_collect, 
> eventually
> before the pass_fini_dump_file () (do we want a last dump of the
> function or not?).

 Fully agree, moved here.

>
> @@ -2397,6 +2410,10 @@ execute_pass_list_1 (opt_pass *pass)
>  {
>gcc_assert (pass->type == GIMPLE_PASS
>   || pass->type == RTL_PASS);
> +
> +
> +  if (!gimple_has_body_p (current_function_decl))
> +   return;
>
> too much vertical space.  With popping cfun before releasing the body the 
> check
> might just become if (!cfun) and

 As mentioned above, as release body is symmetric (calling push & pop), the 
 suggested
 guard will not work.
>>>
>>> I suggest to fix it.  If it calls push/pop it should leave with the
>>> original cfun, popping again
>>> should make it NULL?
>>>
>
> @@ -2409,7 +2426,7 @@ execute_pass_list (function *fn, opt_pass *pass)
>  {
>push_cfun (fn);
>execute_pass_list_1 (pass);
> -  if (fn->cfg)
> +  if (gimple_has_body_p (current_function_decl) && fn->cfg)
>  {
>free_dominance_info (CDI_DOMINATORS);
>free_dominance_info (CDI_POST_DOMINATORS);
>
> here you'd need to guard the pop_cfun call on cfun != NULL.  IMHO it's 
> better
> to not let cfun point to deallocated data.

 As I've read the code, since we gcc_free 'struct function', one can just 
 rely on
 gimple_has_body_p (current_function_decl) as it correctly realizes that
 DECL_STRUCT_FUNCTION (current_function_decl) == NULL.
>>>
>>> I'm sure that with some GC checking ggc_free makes them #deadbeef or so:
>>>
>>> void
>>> ggc_free (void *p)
>>> {
>>> ...
>>> #ifdef ENABLE_GC_CHECK

Re: [AArch64][dejagnu][PATCH 5/7] Dejagnu support for ARMv8.1 Adv.SIMD.

2015-10-27 Thread Matthew Wahab

On 24/10/15 08:16, Bernhard Reutner-Fischer wrote:

On October 23, 2015 2:24:26 PM GMT+02:00, Matthew Wahab 
 wrote:

The ARMv8.1 architecture extension adds two Adv.SIMD instructions,.
This
patch adds support in Dejagnu for ARMv8.1 Adv.SIMD specifiers and
checks.

The new test options are
- { dg-add-options arm_v8_1a_neon }: Add compiler options needed to
   enable ARMv8.1 Adv.SIMD.
- { dg-require-effective-target arm_v8_1a_neon_hw }: Require a target
   capable of executing ARMv8.1 Adv.SIMD instructions.



Please error with something more meaningful than FOO, !__ARM_FEATURE_QRDMX 
comes to mind.

TIA,



I've reworked the patch so that the error is "__ARM_FEATURE_QRDMX not
defined" and also strengthened the check_effective_target tests.

Retested for aarch64-none-elf with cross-compiled check-gcc on an
ARMv8.1 emulator. Also tested with a version of the compiler that
doesn't define the ACLE feature macro.

Matthew

gcc/testsuite
2015-10-27  Matthew Wahab  

* lib/target-supports.exp (add_options_for_arm_v8_1a_neon): New.
(check_effective_target_arm_arch_FUNC_ok)
(add_options_for_arm_arch_FUNC)
(check_effective_target_arm_arch_FUNC_multilib): Add "armv8.1-a"
to the list to be generated.
(check_effective_target_arm_v8_1a_neon_ok_nocache): New.
(check_effective_target_arm_v8_1a_neon_ok): New.
(check_effective_target_arm_v8_1a_neon_hw): New.


>From b12969882298cb79737e882c48398c58a45161b9 Mon Sep 17 00:00:00 2001
From: Matthew Wahab 
Date: Mon, 26 Oct 2015 14:58:36 +
Subject: [PATCH 5/7] [Testsuite] Add dejagnu options for armv8.1 neon

Change-Id: Ib58b8c4930ad3971af3ea682eda043e14cd2e8b3
---
 gcc/testsuite/lib/target-supports.exp | 56 ++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 4d5b0a3d..0fb679d 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2700,6 +2700,16 @@ proc add_options_for_arm_v8_neon { flags } {
 return "$flags $et_arm_v8_neon_flags -march=armv8-a"
 }
 
+# Add the options needed for ARMv8.1 Adv.SIMD.
+
+proc add_options_for_arm_v8_1a_neon { flags } {
+if { [istarget aarch64*-*-*] } {
+	return "$flags -march=armv8.1-a"
+} else {
+	return "$flags"
+}
+}
+
 proc add_options_for_arm_crc { flags } {
 if { ! [check_effective_target_arm_crc_ok] } {
 return "$flags"
@@ -2984,7 +2994,8 @@ foreach { armfunc armflag armdef } { v4 "-march=armv4 -marm" __ARM_ARCH_4__
  v7r "-march=armv7-r" __ARM_ARCH_7R__
  v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__
  v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__
- v8a "-march=armv8-a" __ARM_ARCH_8A__ } {
+ v8a "-march=armv8-a" __ARM_ARCH_8A__
+ v8_1a "-march=armv8.1a" __ARM_ARCH_8A__ } {
 eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] {
 	proc check_effective_target_arm_arch_FUNC_ok { } {
 	if { [ string match "*-marm*" "FLAG" ] &&
@@ -3141,6 +3152,25 @@ proc check_effective_target_arm_neonv2_hw { } {
 } [add_options_for_arm_neonv2 ""]]
 }
 
+# Return 1 if the target supports the ARMv8.1 Adv.SIMD extension, 0
+# otherwise.  The test is valid for AArch64.
+
+proc check_effective_target_arm_v8_1a_neon_ok_nocache { } {
+if { ![istarget aarch64*-*-*] } {
+	return 0
+}
+return [check_no_compiler_messages_nocache arm_v8_1a_neon_ok assembly {
+	#if !defined (__ARM_FEATURE_QRDMX)
+	#error "__ARM_FEATURE_QRDMX not defined"
+	#endif
+} [add_options_for_arm_v8_1a_neon ""]]
+}
+
+proc check_effective_target_arm_v8_1a_neon_ok { } {
+return [check_cached_effective_target arm_v8_1a_neon_ok \
+		check_effective_target_arm_v8_1a_neon_ok_nocache]
+}
+
 # Return 1 if the target supports executing ARMv8 NEON instructions, 0
 # otherwise.
 
@@ -3159,6 +3189,30 @@ proc check_effective_target_arm_v8_neon_hw { } {
 } [add_options_for_arm_v8_neon ""]]
 }
 
+# Return 1 if the target supports executing the ARMv8.1 Adv.SIMD extension, 0
+# otherwise.  The test is valid for AArch64.
+
+proc check_effective_target_arm_v8_1a_neon_hw { } {
+if { ![check_effective_target_arm_v8_1a_neon_ok] } {
+	return 0;
+}
+return [check_runtime_nocache arm_v8_1a_neon_hw_available {
+	int
+	main (void)
+	{
+	  long long a = 0, b = 1;
+	  long long result = 0;
+
+	  asm ("sqrdmlah %s0,%s1,%s2"
+	   : "=w"(result)
+	   : "w"(a), "w"(b)
+	   : /* No clobbers.  */);
+
+	  return result;
+	}
+}  [add_options_for_arm_v8_1a_neon ""]]
+}
+
 # Return 1 if this is a ARM target with NEON enabled.
 
 proc check_effective_target_arm_neon { } {
-- 
2.1.4



Re: Move some comparison simplifications to match.pd

2015-10-27 Thread Kyrill Tkachov

Hi Marc,

On 30/08/15 08:57, Marc Glisse wrote:

Hello,

just trying to shrink fold-const.c a bit more.

initializer_zerop is close to what I was looking for with zerop, but I
wasn't sure if it would be safe (it accepts some CONSTRUCTOR and
STRING_CST). At some point I tried using sign_bit_p, but using the return
of that function in the simplification confused the machinery too much. I
added an "overload" of element_precision like the one in element_mode, for
convenience.

Bootstrap+testsuite on ppc64le-redhat-linux.


2015-08-31  Marc Glisse  

gcc/
* tree.h (zerop): New function.
* tree.c (zerop): Likewise.
(element_precision): Handle expressions.
* match.pd (define_predicates): Add zerop.
(x <= +Inf): Fix comment.
(abs (x) == 0, A & C == C, A & C != 0): Converted from ...
* fold-const.c (fold_binary_loc): ... here. Remove.

gcc/testsuite/
* gcc.dg/tree-ssa/cmp-1.c: New file.



+/* If we have (A & C) != 0 where C is the sign bit of A, convert
+   this into A < 0.  Similarly for (A & C) == 0 into A >= 0.  */
+(for cmp (eq ne)
+ ncmp (ge lt)
+ (simplify
+  (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop)
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+   && (TYPE_PRECISION (TREE_TYPE (@0))
+  == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0
+   && element_precision (@2) >= element_precision (@0)
+   && wi::only_sign_bit_p (@1, element_precision (@0)))
+   (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
+(ncmp (convert:stype @0) { build_zero_cst (stype); })
+

With this patch and this pattern pattern in particular I've seen some code 
quality regressions on aarch64.
I'm still trying to reduce a testcase to demonstrate the issue but it seems to 
involve
intorucing extra conversions from unsigned to signed values. If I gate this 
pattern on
!TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve.

Any thoughts?
Thanks,
Kyrill




Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread H.J. Lu
On Tue, Oct 27, 2015 at 8:26 AM, Jiong Wang  wrote:
>
>
> On 27/10/15 14:50, H.J. Lu wrote:
>>
>> On Tue, Oct 27, 2015 at 7:34 AM, Ramana Radhakrishnan
>>  wrote:

 OK, then it's fairly x86-64 specific optimization, because we can't do
 "call *mem" in
 aarch64 and some other targets.
>>>
>>> It is a fairly x86_64 specific optimization and doesn't apply to AArch64.
>>>
>>> The question really is what impact does removing the generic code
>>> handling have on aarch64 - is it a no-op or not for the existing -fno-plt
>>> implementation in the AArch64 backend ? The only case that is of interest is
>>> the bit below in calls.c and it looks like that may well be redundant with
>>> the logic in the backend already, but I have not done the full analysis to
>>> convince myself that the code in the backend is sufficient.
>>>
>>> -  && (!flag_plt
>>> -  || lookup_attribute ("noplt", DECL_ATTRIBUTES
>>> (fndecl_or_type)))
>>> -  && !targetm.binds_local_p (fndecl_or_type))
>>>
>> -fno-plt is a backend specific optimization and should be handled
>> in backend.
>>
>
> The removing of those generic code has broken aarch64.
>

Can you handle -fno-plt in aarch64 call expander?

-- 
H.J.


Re: more accurate error messages omp in fortran

2015-10-27 Thread Cesar Philippidis
(was "Re: more accurate omp in fortran"

Ping.

Cesar

On 10/22/2015 08:21 AM, Cesar Philippidis wrote:
> Currently, for certain omp and oacc errors the fortran will inaccurately
> report exactly where in the omp/acc construct the error has occurred. E.g.
> 
>!$acc parallel copy (i) copy (i) copy (j)
>1
> Error: Symbol ‘i’ present on multiple clauses at (1)
> 
> instead of
> 
>!$acc parallel copy (i) copy (i) copy (j)
> 1
> Error: Symbol ‘i’ present on multiple clauses at (1)
> 
> The problem here is how the front end uses the locus for the construct
> and not the individual clause. As a result that diagnostic pointer
> points to the end of the construct.
> 
> This patch teaches gfc_resolve_omp_clauses how to use the locus of each
> individual clause instead of the construct when reporting errors
> involving OMP_LIST_ clauses (which are typically clauses involving
> variables). It's still not perfect, but it does improve the quality of
> the error reporting a little. In particular, in openacc, other compilers
> are somewhat lenient in allowing variables to appear in multiple
> clauses, e.g. copyin (foo) copyout (foo), but this is clearly forbidden
> by the spec. I received some bug reports complaining that gfortran's
> errors aren't accurate.
> 
> I've also split off the check for variables appearing in multiple
> clauses into a separate function. It's a little overkill for trunk right
> now, but it is used quite a bit in gomp4 for oacc declare.
> 
> I've tested these changes on x86_64. Is this ok for trunk?
> 
> Cesar
> 
> 



Re: [PATCH] Add -fchecking

2015-10-27 Thread Mikhail Maltsev
On 10/27/2015 04:17 PM, Richard Biener wrote:
> 
> This adds -fchecking as a way to enable internal consistency checks
> even in release builds (or disable checking with -fno-checking - up to
> a certain extent - with checking enabled).

I remember that Jakub proposed to use __builtin_expect with
flag_checking. I wonder, if it is possible to implement without hacking
AWK scripts just for this particular case? For example, to define
flag_checking to something like

#define flag_checking __builtin_expect (flag_checking_val, CHECKING_P)

(provided that flag_checking_val is the actual value got from
command-line options).

-- 
Regards,
Mikhail Maltsev


Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread Ramana Radhakrishnan


On 27/10/15 14:50, H.J. Lu wrote:
> On Tue, Oct 27, 2015 at 7:34 AM, Ramana Radhakrishnan
>  wrote:
>>
>>>
>>> OK, then it's fairly x86-64 specific optimization, because we can't do 
>>> "call *mem" in
>>> aarch64 and some other targets.
>>
>> It is a fairly x86_64 specific optimization and doesn't apply to AArch64.
>>
>> The question really is what impact does removing the generic code handling 
>> have on aarch64 - is it a no-op or not for the existing -fno-plt 
>> implementation in the AArch64 backend ? The only case that is of interest is 
>> the bit below in calls.c and it looks like that may well be redundant with 
>> the logic in the backend already, but I have not done the full analysis to 
>> convince myself that the code in the backend is sufficient.
>>
>> -  && (!flag_plt
>> -  || lookup_attribute ("noplt", DECL_ATTRIBUTES 
>> (fndecl_or_type)))
>> -  && !targetm.binds_local_p (fndecl_or_type))
>>
> 
> -fno-plt is a backend specific optimization and should be handled
> in backend.
> 

HJ, Thanks for committing the change even when we were discussing the change - 
As I suspected the handling in the backend isn't sufficient, the call expanders 
need to handle this case in the AArch64 backend. Minimally tested -

Ok if no regressions on aarch64-none-elf?

regards
Ramana

* config/aarch64/aarch64.md (call, call_value): Handle noplt.


Re: Re: [Bulk] [OpenACC 0/7] host_data construct

2015-10-27 Thread Cesar Philippidis
On 10/26/2015 11:34 AM, Jakub Jelinek wrote:
> On Fri, Oct 23, 2015 at 10:51:42AM -0500, James Norris wrote:
>> @@ -12942,6 +12961,7 @@ c_finish_omp_clauses (tree clauses, bool is_omp, 
>> bool declare_simd)
>>  case OMP_CLAUSE_GANG:
>>  case OMP_CLAUSE_WORKER:
>>  case OMP_CLAUSE_VECTOR:
>> +case OMP_CLAUSE_USE_DEVICE:
>>pc = &OMP_CLAUSE_CHAIN (c);
>>continue;
>>  
> 
> Are there any restrictions on whether you can specify the same var multiple
> times in use_device clause?
> #pragma acc host_data use_device (x) use_device (x) use_device (y, y, y)
> ?
> If not, have you verified that the gimplifier doesn't ICE on it?  Generally
> it doesn't like the same var being mentioned multiple times.
> If yes, you can use e.g. the generic_head bitmap for that and in any case,
> cover that with sufficient testsuite coverage.

Generally variables cannot appear in multiple clauses. I'll add more
testing for this.

>> diff --git a/gcc/gimplify.c b/gcc/gimplify.c
>> index ab9e540..0c32219 100644
>> --- a/gcc/gimplify.c
>> +++ b/gcc/gimplify.c
>> @@ -93,6 +93,8 @@ enum gimplify_omp_var_data
>>  
>>GOVD_MAP_0LEN_ARRAY = 32768,
>>  
>> +  GOVD_USE_DEVICE = 65536,
>> +
>>GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE
>> | GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR
>> | GOVD_LOCAL)
>> @@ -116,7 +118,9 @@ enum omp_region_type
>>ORT_COMBINED_TARGET = 33,
>>/* Dummy OpenMP region, used to disable expansion of
>>   DECL_VALUE_EXPRs in taskloop pre body.  */
>> -  ORT_NONE = 64
>> +  ORT_NONE = 64,
>> +  /* An OpenACC host-data region.  */
>> +  ORT_HOST_DATA = 128
> 
> I'd prefer ORT_NONE to be the last one, can you just renumber it and put
> ORT_HOST_DATA before it?

OK.

>> +static tree
>> +gimplify_oacc_host_data_1 (tree *tp, int *walk_subtrees,
>> +   void *data ATTRIBUTE_UNUSED)
>> +{
> 
> Your use_device sounds very similar to use_device_ptr clause in OpenMP,
> which is allowed on #pragma omp target data construct and is implemented
> quite a bit differently from this; it is unclear if the OpenACC standard
> requires this kind of implementation, or you just chose to implement it this
> way.  In particular, the GOMP_target_data call puts the variables mentioned
> in the use_device_ptr clauses into the mapping structures (similarly how
> map clause appears) and the corresponding vars are privatized within the
> target data region (which is a host region, basically a fancy { } braces),
> where the private variables contain the offloading device's pointers.

Is this a new OpenMP 4.5 feature? I'll take a closer look and see if
they are similar enough. I also noticed that OpenMP 4.5 has something
similar to OpenACC's enter/exit data construct now.

>> +  splay_tree_node n = NULL;
>> +  location_t loc = EXPR_LOCATION (*tp);
>> +
>> +  switch (TREE_CODE (*tp))
>> +{
>> +case ADDR_EXPR:
>> +  {
>> +tree decl = TREE_OPERAND (*tp, 0);
>> +
>> +switch (TREE_CODE (decl))
>> +  {
>> +  case ARRAY_REF:
>> +  case ARRAY_RANGE_REF:
>> +  case COMPONENT_REF:
>> +  case VIEW_CONVERT_EXPR:
>> +  case REALPART_EXPR:
>> +  case IMAGPART_EXPR:
>> +if (TREE_CODE (TREE_OPERAND (decl, 0)) == VAR_DECL)
>> +  n = splay_tree_lookup (gimplify_omp_ctxp->variables,
>> + (splay_tree_key) TREE_OPERAND (decl, 0));
>> +break;
> 
> I must say this looks really strange, you throw away all the offsets
> embedded in the component codes (fixed or variable).
> Where comes the above list?  What about other components (say bit field refs,
> etc.)?

I'm not sure. This is one of those things where multiple developers
worked on it, and the history got lost. I'll investigate it.

>> +case VAR_DECL:
> 
> What is so special about VAR_DECLs?  Shouldn't PARM_DECLs / RESULT_DECLs
> be treated the same way?
>> --- a/libgomp/libgomp.map
>> +++ b/libgomp/libgomp.map
>> @@ -378,6 +378,7 @@ GOACC_2.0 {
>>  GOACC_wait;
>>  GOACC_get_thread_num;
>>  GOACC_get_num_threads;
>> +GOACC_deviceptr;
>>  };
>>  
>>  GOACC_2.0.1 {
> 
> You shouldn't be adding new symbols into a symbol version that appeared in a
> compiler that shipped already (GCC 5 already had GOACC_2.0 symbols).
> So it should go into GOACC_2.0.1.

OK.

>> diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c
>> index af067d6..497ab92 100644
>> --- a/libgomp/oacc-mem.c
>> +++ b/libgomp/oacc-mem.c
>> @@ -204,6 +204,38 @@ acc_deviceptr (void *h)
>>return d;
>>  }
>>  
>> +/* This function is used as a helper in generated code to implement pointer
>> +   lookup in host_data regions.  Unlike acc_deviceptr, it returns its 
>> argument
>> +   unchanged on a shared-memory system (e.g. the host).  */
>> +
>> +void *
>> +GOACC_deviceptr (void *h)
>> +{
>> +  splay_tree_key n;
>> +  void *d;
>> +  void *offset;
>> +
>> +  goacc_lazy_initialize ();
>> +
>> +  st

Re: Move some comparison simplifications to match.pd

2015-10-27 Thread Marc Glisse

On Tue, 27 Oct 2015, Kyrill Tkachov wrote:


Hi Marc,

On 30/08/15 08:57, Marc Glisse wrote:

Hello,

just trying to shrink fold-const.c a bit more.

initializer_zerop is close to what I was looking for with zerop, but I
wasn't sure if it would be safe (it accepts some CONSTRUCTOR and
STRING_CST). At some point I tried using sign_bit_p, but using the return
of that function in the simplification confused the machinery too much. I
added an "overload" of element_precision like the one in element_mode, for
convenience.

Bootstrap+testsuite on ppc64le-redhat-linux.


2015-08-31  Marc Glisse  

gcc/
* tree.h (zerop): New function.
* tree.c (zerop): Likewise.
(element_precision): Handle expressions.
* match.pd (define_predicates): Add zerop.
(x <= +Inf): Fix comment.
(abs (x) == 0, A & C == C, A & C != 0): Converted from ...
* fold-const.c (fold_binary_loc): ... here. Remove.

gcc/testsuite/
* gcc.dg/tree-ssa/cmp-1.c: New file.



+/* If we have (A & C) != 0 where C is the sign bit of A, convert
+   this into A < 0.  Similarly for (A & C) == 0 into A >= 0.  */
+(for cmp (eq ne)
+ ncmp (ge lt)
+ (simplify
+  (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop)
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+   && (TYPE_PRECISION (TREE_TYPE (@0))
+  == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0
+   && element_precision (@2) >= element_precision (@0)
+   && wi::only_sign_bit_p (@1, element_precision (@0)))


This condition is a bit strict when @0 is signed, for an int32_t i, (i & 
(5LL << 42)) != 0 would work as well thanks to sign extension.



+   (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
+(ncmp (convert:stype @0) { build_zero_cst (stype); })
+

With this patch and this pattern pattern in particular I've seen some code 
quality regressions on aarch64.
I'm still trying to reduce a testcase to demonstrate the issue but it seems 
to involve
intorucing extra conversions from unsigned to signed values. If I gate this 
pattern on

!TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve.

Any thoughts?


Hmm, my first thoughts would be:
* conversion from unsigned to signed is a NOP,
* if a & signbit != 0 is faster to compute than a < 0, that's how a < 0 
should be expanded by the target,
* so the problem is probably something else, maybe the bit_and combined 
better with another operation, or the cast obfuscates things enough to 
confuse a later pass. Or maybe something related to @2.


An example would help understand what we are talking about...

--
Marc Glisse


Re: Move some comparison simplifications to match.pd

2015-10-27 Thread Kyrill Tkachov


On 27/10/15 15:57, Marc Glisse wrote:

On Tue, 27 Oct 2015, Kyrill Tkachov wrote:


Hi Marc,

On 30/08/15 08:57, Marc Glisse wrote:

Hello,

just trying to shrink fold-const.c a bit more.

initializer_zerop is close to what I was looking for with zerop, but I
wasn't sure if it would be safe (it accepts some CONSTRUCTOR and
STRING_CST). At some point I tried using sign_bit_p, but using the return
of that function in the simplification confused the machinery too much. I
added an "overload" of element_precision like the one in element_mode, for
convenience.

Bootstrap+testsuite on ppc64le-redhat-linux.


2015-08-31  Marc Glisse  

gcc/
  * tree.h (zerop): New function.
  * tree.c (zerop): Likewise.
  (element_precision): Handle expressions.
  * match.pd (define_predicates): Add zerop.
  (x <= +Inf): Fix comment.
  (abs (x) == 0, A & C == C, A & C != 0): Converted from ...
  * fold-const.c (fold_binary_loc): ... here. Remove.

gcc/testsuite/
  * gcc.dg/tree-ssa/cmp-1.c: New file.



+/* If we have (A & C) != 0 where C is the sign bit of A, convert
+   this into A < 0.  Similarly for (A & C) == 0 into A >= 0.  */
+(for cmp (eq ne)
+ ncmp (ge lt)
+ (simplify
+  (cmp (bit_and (convert?@2 @0) integer_pow2p@1) integer_zerop)
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+   && (TYPE_PRECISION (TREE_TYPE (@0))
+   == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (@0
+   && element_precision (@2) >= element_precision (@0)
+   && wi::only_sign_bit_p (@1, element_precision (@0)))


This condition is a bit strict when @0 is signed, for an int32_t i, (i & (5LL 
<< 42)) != 0 would work as well thanks to sign extension.


+   (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
+(ncmp (convert:stype @0) { build_zero_cst (stype); })
+

With this patch and this pattern pattern in particular I've seen some code 
quality regressions on aarch64.
I'm still trying to reduce a testcase to demonstrate the issue but it seems to 
involve
intorucing extra conversions from unsigned to signed values. If I gate this 
pattern on
!TYPE_UNSIGNED (TREE_TYPE (@0)) the codegen seems to improve.

Any thoughts?


Hmm, my first thoughts would be:
* conversion from unsigned to signed is a NOP,
* if a & signbit != 0 is faster to compute than a < 0, that's how a < 0 should 
be expanded by the target,
* so the problem is probably something else, maybe the bit_and combined better 
with another operation, or the cast obfuscates things enough to confuse a later 
pass. Or maybe something related to @2.



Thanks,
So here the types are shorts and unsigned shorts. On aarch64 these are HImode 
values and there's no direct arithmetic
operations on them, so they have to be extended to SImode and truncated back.


An example would help understand what we are talking about...


Working on it. I'm trying to introduce gcc_unreachable calls into the compiler 
when the bad situation happens
and reducing the original file, but I think I'm not capturing the conditions 
that trigger this behaviour
exactly right :(

Kyrill




Re: [AArch64][PATCH 2/7] Add sqrdmah, sqrdmsh instructions.

2015-10-27 Thread Matthew Wahab

On 27/10/15 11:18, James Greenhalgh wrote:


  ;; ---
@@ -932,6 +934,8 @@
 UNSPEC_SQSHRN UNSPEC_UQSHRN
 UNSPEC_SQRSHRN UNSPEC_UQRSHRN])

+(define_int_iterator SQRDMLAH [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH])
+


This name does not make it clear that you will iterate over an "A" and an
"S" form. I'd like to see a clearer naming choice, RDMAS? SQRDMLHADDSUB? etc.


SQRDMLHADDSUB is a little difficult to read. How about SQRDMLH_AS, to keep the link 
to the instruction?


Matthew




Re: [AArch64][PATCH 2/7] Add sqrdmah, sqrdmsh instructions.

2015-10-27 Thread James Greenhalgh
On Tue, Oct 27, 2015 at 04:11:07PM +, Matthew Wahab wrote:
> On 27/10/15 11:18, James Greenhalgh wrote:
> 
> >>  ;; ---
> >>@@ -932,6 +934,8 @@
> >> UNSPEC_SQSHRN UNSPEC_UQSHRN
> >> UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
> >>
> >>+(define_int_iterator SQRDMLAH [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH])
> >>+
> >
> >This name does not make it clear that you will iterate over an "A" and an
> >"S" form. I'd like to see a clearer naming choice, RDMAS? SQRDMLHADDSUB? etc.
> 
> SQRDMLHADDSUB is a little difficult to read. How about SQRDMLH_AS,
> to keep the link to the instruction?

Sounds good to me.

Thanks,
James



Re: Move some comparison simplifications to match.pd

2015-10-27 Thread Marc Glisse

On Tue, 27 Oct 2015, Kyrill Tkachov wrote:

So here the types are shorts and unsigned shorts. On aarch64 these are 
HImode values and there's no direct arithmetic operations on them, so 
they have to be extended to SImode and truncated back.


Ah ok, that makes sense. I expect it is in the case where @0 is shorter 
than a word and @2 is longer than @0. I wouldn't expect the signedness to 
matter that much, but maybe one of sign/zero-extension simplifies better 
than the other.


This problem would happen because we are still missing a pass that handles 
type promotion.


It means we could consider gating the transformation by the existence of 
lt in the mode of @0, but I don't see any query of optabs in match.pd 
yet...


Still, a testcase would be necessary to go with whatever patch, and I may 
be completely on the wrong track.


--
Marc Glisse


[PATCH, committed] PR fortran/68108 -- Check for valid array ref.

2015-10-27 Thread Steve Kargl
I've committed the attached patch after testing on
x86_64-*-freebsd.  A regression was introduced by 
my fix for PR fortran/67805, which failed to check
for a valid array ref.  Note, Mikael approved the 
patch in the PR audit trail.

2015-10-27  Steven G. Kargl  

PR fortran/68108
* decl.c (char_len_param_value): Check for REF_ARRAY.
 
2015-10-27  Steven G. Kargl  

PR fortran/68108
* gfortran.dg/pr67805_2.f90: New test.

-- 
Steve
Index: gcc/fortran/decl.c
===
--- gcc/fortran/decl.c	(revision 229445)
+++ gcc/fortran/decl.c	(working copy)
@@ -748,13 +748,15 @@ char_len_param_value (gfc_expr **expr, b
 
   /* This catches the invalid code "[character(m(2:3)) :: 'x', 'y']",
 	 which causes an ICE if gfc_reduce_init_expr() is called.  */
-  if (e->ref && e->ref->u.ar.type == AR_UNKNOWN
+  if (e->ref && e->ref->type == REF_ARRAY
+	  && e->ref->u.ar.type == AR_UNKNOWN
 	  && e->ref->u.ar.dimen_type[0] == DIMEN_RANGE)
 	goto syntax;
 
   gfc_reduce_init_expr (e);
 
-  if ((e->ref && e->ref->u.ar.type != AR_ELEMENT) 
+  if ((e->ref && e->ref->type == REF_ARRAY
+	   && e->ref->u.ar.type != AR_ELEMENT) 
 	  || (!e->ref && e->expr_type == EXPR_ARRAY))
 	{
 	  gfc_free_expr (e);
Index: gcc/testsuite/gfortran.dg/pr67805_2.f90
===
--- gcc/testsuite/gfortran.dg/pr67805_2.f90	(revision 0)
+++ gcc/testsuite/gfortran.dg/pr67805_2.f90	(working copy)
@@ -0,0 +1,22 @@
+! { dg-do compile }
+! PR fortran/68108
+! Code contributed by Juergen Reuter (juergen.reuter at desy dot de)
+! Test fix for regression caused by PR fortran/67805.
+module lexers
+  implicit none
+  type :: template_t
+ character(256) :: charset1
+ integer :: len1
+  end type template_t
+
+contains
+
+  subroutine match_quoted (tt, s, n)
+type(template_t), intent(in) :: tt
+character(*), intent(in) :: s
+integer, intent(out) :: n
+character(tt%len1) :: ch1
+ch1 = tt%charset1
+  end subroutine match_quoted
+
+end module lexers


Re: [PR fortran/63865] OpenACC cache directive: match Fortran support with C/C++

2015-10-27 Thread Thomas Schwinge
Hi!

On Tue, 27 Oct 2015 16:26:54 +0100, Jakub Jelinek  wrote:
> On Tue, Oct 27, 2015 at 04:19:49PM +0100, Thomas Schwinge wrote:
> > On Wed, 05 Nov 2014 17:44:58 +0100, I wrote:
> > > On Wed, 05 Nov 2014 17:36:46 +0100, I wrote:
> > > > In r217146, I applied the following to gomp-4_0-branch:
> > > > 
> > > > [OpenACC cache directive maintenance in C/C++]
> > 
> > > I also tried to make this work for Fortran, but didn't manage to (in
> > > a reasonable amount of time, which has not been a lot that I allocated)
> > > ;-) -- would you please have a look at this (but it's not urgent).
> > > 
> > > [WIP patch]
> > 
> > That never got resolved, so I've now done it myself, directly for trunk.
> > OK to commit?
> 
> Ok.

Thanks for the speedy review, committed in r229448:

commit 09382f4e948e567fd47a518eeb5484d848898753
Author: tschwinge 
Date:   Tue Oct 27 16:54:52 2015 +

[PR fortran/63865] OpenACC cache directive: match Fortran support with C/C++

gcc/fortran/
PR fortran/63865
* openmp.c (resolve_oacc_cache): Remove function.
(gfc_match_oacc_cache): Enable array sections.
(resolve_omp_clauses, gfc_resolve_oacc_directive): Change
accordingly.
* trans-openmp.c (gfc_trans_omp_clauses): Likewise.
gcc/testsuite/
PR fortran/63865
* gfortran.dg/goacc/coarray.f95: Expect the OpenACC cache
directive to work.
* gfortran.dg/goacc/loop-1.f95: Likewise.
* gfortran.dg/goacc/cache-1.f95: Likewise, and extend testing.
* gfortran.dg/goacc/cray.f95: Likewise.
* gfortran.dg/goacc/parameter.f95: Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@229448 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/fortran/ChangeLog | 10 ++
 gcc/fortran/openmp.c  | 16 
 gcc/fortran/trans-openmp.c| 22 --
 gcc/testsuite/ChangeLog   | 11 +++
 gcc/testsuite/gfortran.dg/goacc/cache-1.f95   |  9 +++--
 gcc/testsuite/gfortran.dg/goacc/coarray.f95   |  3 ++-
 gcc/testsuite/gfortran.dg/goacc/cray.f95  |  4 +---
 gcc/testsuite/gfortran.dg/goacc/loop-1.f95|  1 -
 gcc/testsuite/gfortran.dg/goacc/parameter.f95 |  3 +--
 9 files changed, 52 insertions(+), 27 deletions(-)

diff --git gcc/fortran/ChangeLog gcc/fortran/ChangeLog
index 37956ce..02564ce 100644
--- gcc/fortran/ChangeLog
+++ gcc/fortran/ChangeLog
@@ -1,3 +1,13 @@
+2015-10-27  Thomas Schwinge  
+   James Norris  
+
+   PR fortran/63865
+   * openmp.c (resolve_oacc_cache): Remove function.
+   (gfc_match_oacc_cache): Enable array sections.
+   (resolve_omp_clauses, gfc_resolve_oacc_directive): Change
+   accordingly.
+   * trans-openmp.c (gfc_trans_omp_clauses): Likewise.
+
 2015-10-27  Steven G. Kargl  
 
PR fortran/68108
diff --git gcc/fortran/openmp.c gcc/fortran/openmp.c
index 3c12d8e..6c78c97 100644
--- gcc/fortran/openmp.c
+++ gcc/fortran/openmp.c
@@ -1387,7 +1387,8 @@ gfc_match_oacc_cache (void)
 {
   gfc_omp_clauses *c = gfc_get_omp_clauses ();
   match m = gfc_match_omp_variable_list (" (",
-&c->lists[OMP_LIST_CACHE], true);
+&c->lists[OMP_LIST_CACHE], true,
+NULL, NULL, true);
   if (m != MATCH_YES)
 {
   gfc_free_omp_clauses(c);
@@ -3107,6 +3108,7 @@ resolve_omp_clauses (gfc_code *code, locus *where,
  case OMP_LIST_MAP:
  case OMP_LIST_TO:
  case OMP_LIST_FROM:
+ case OMP_LIST_CACHE:
for (; n != NULL; n = n->next)
  {
if (n->expr)
@@ -3380,7 +3382,6 @@ resolve_omp_clauses (gfc_code *code, locus *where,
   n->sym->name, name, where);
  /* FALLTHRU */
  case OMP_LIST_DEVICE_RESIDENT:
- case OMP_LIST_CACHE:
check_symbol_not_pointer (n->sym, *where, name);
check_array_not_assumed (n->sym, *where, name);
break;
@@ -4597,13 +4598,6 @@ resolve_oacc_loop (gfc_code *code)
 }
 
 
-static void
-resolve_oacc_cache (gfc_code *code ATTRIBUTE_UNUSED)
-{
-  sorry ("Sorry, !$ACC cache unimplemented yet");
-}
-
-
 void
 gfc_resolve_oacc_declare (gfc_namespace *ns)
 {
@@ -4657,6 +4651,7 @@ gfc_resolve_oacc_directive (gfc_code *code, gfc_namespace 
*ns ATTRIBUTE_UNUSED)
 case EXEC_OACC_ENTER_DATA:
 case EXEC_OACC_EXIT_DATA:
 case EXEC_OACC_WAIT:
+case EXEC_OACC_CACHE:
   resolve_omp_clauses (code, &code->loc, code->ext.omp_clauses, NULL,
   true);
   break;
@@ -4665,9 +4660,6 @@ gfc_resolve_oacc_directive (gfc_code *code, gfc_namespace 
*ns ATTRIBUTE_UNUSED)
 case EXEC_OACC_LOOP:
   resolve_oacc_loop (code);
   break;
-case EXEC_OACC_CACHE:
-  resolve_oacc_cache (code);
- 

Re: [PING][PATCHv2, ARM, libgcc] New aeabi_idiv function for armv6-m

2015-10-27 Thread Andre Vieira

Ping.

BR,
Andre

On 13/10/15 18:01, Andre Vieira wrote:

This patch ports the aeabi_idiv routine from Linaro Cortex-Strings
(https://git.linaro.org/toolchain/cortex-strings.git), which was
contributed by ARM under Free BSD license.

The new aeabi_idiv routine is used to replace the one in
libgcc/config/arm/lib1funcs.S. This replacement happens within the
Thumb1 wrapper. The new routine is under LGPLv3 license.

The main advantage of this version is that it can improve the
performance of the aeabi_idiv function for Thumb1. This solution will
also increase the code size. So it will only be used if
__OPTIMIZE_SIZE__ is not defined.

Make check passed for armv6-m.

libgcc/ChangeLog:
2015-08-10  Hale Wang  
  Andre Vieira  

* config/arm/lib1funcs.S: Add new wrapper.





Re: [PATCH] Fix PR66313

2015-10-27 Thread Joseph Myers
On Tue, 27 Oct 2015, Richard Biener wrote:

> When factoring a*b + a*c to (b + c)*a we have to guard against the
> case of a == 0 as after the factoring b + c might overflow in that
> case.  Fixed by doing the addition in an unsigned type if required.

The same applies to a == -1 (consider b and c equal to -(INT_MIN/2), when 
a*b + a*c is INT_MIN but b+c overflows).  In that case, if you avoid b+c 
overflowing using an unsigned type, but convert back to signed for the 
multiplication, you get a spurious overflow from the multiplication 
instead.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] PR/67682, break SLP groups up if only some elements match

2015-10-27 Thread Alan Lawrence

On 26/10/15 15:04, Richard Biener wrote:


apart from the fact that you'll post a new version you need to adjust GROUP_GAP.
You also seem to somewhat "confuse" "first I stmts" and "a group of
size I", those
are not the same when the group has haps.  I'd say "a group of size i" makes the
most sense here thus I suggest to adjust the function comment accordingly.


Ok, thanks for pointing this out. My objective had been to only split the store 
groups - which in BB vectorization, always seem to have gap 0 1 1  1. I 
didn't come up with a good scheme for how to split load groups, but it seemed 
that I didn't need to do anything there if I restricted to BB vectorization 
only. For example, consider (ignoring that we could multiply the first four 
elements by 1 and add 0 to the last four):


a[0] = b[I] + 1;
a[1] = b[J] + 2;
a[2] = b[K] + 3;
a[3] = b[L] + 4;
a[4] = b[M] * 3;
a[5] = b[N] * 4;
a[6] = b[O] * 5;
a[7] = b[P] * 7;


with constants I,J,K,L,M,N,O,P. Even with those being a sequence 2 0 1 1 3 0 2 1 
with overlaps and repetitions, this works fine for BB SLP (two subgroups of 
stores, *sharing* a load group but with different permutations). Likewise 0 1 2 
3 0 2 4 6.


For loop SLP, yes it looks like the load group needs to be split. So how; and 
what constraints to impose on those constants? (There is no single right answer!)


A fairly-strict scheme could be that (I,J,K,L) must be within a contiguous block 
of memory, that does not overlap with the contiguous block containing (M,N,O,P). 
Then, splitting the load group on the boundary seems reasonable, and updating 
the gaps as you suggest. However, when you say "the group first elements 
GROUP_GAP is the gap at the _end_ of the whole group" - the gap at the end is 
the gap that comes after the last element and up towhat?


Say I...P are consecutive, the input would have gaps 0 1 1 1 1 1 1 1. If we 
split the load group, we would want subgroups with gaps 0 1 1 1 and 0 1 1 1? 
(IIUC, you suggest  and 0111?)


If they are disjoint sets, but overlapping blocks of memory, say 0 2 4 6 1 3 5 
7...then do we create two load groups, with gap 0 2 2 2 and 0 2 2 2 again? Does 
something record that the load groups access overlapping areas, and record the 
offset against each other?


If there are repeated elements (as in the BB SLP case mentioned above), I'm not 
clear how we can split this effectively...so may have to rule out that case. 
(Moreover, if we are considering hybrid SLP, it may not be clear what the loop 
accesses are, we may be presented only with the SLP accesses. Do we necessarily 
want to pull those out of a load group?)


So I expect I may resolve some of these issues as I progress, but I'm curious as 
to whether (and why) the patch was really broken (wrt gaps) as it stood...


Thanks,
Alan



Re: PING: [PATCH] PR target/67215: -fno-plt needs improvements for x86

2015-10-27 Thread Jeff Law

On 10/27/2015 09:42 AM, Ramana Radhakrishnan wrote:



On 27/10/15 14:50, H.J. Lu wrote:

On Tue, Oct 27, 2015 at 7:34 AM, Ramana Radhakrishnan
 wrote:




OK, then it's fairly x86-64 specific optimization, because we can't do "call 
*mem" in
aarch64 and some other targets.


It is a fairly x86_64 specific optimization and doesn't apply to AArch64.

The question really is what impact does removing the generic code handling have 
on aarch64 - is it a no-op or not for the existing -fno-plt implementation in 
the AArch64 backend ? The only case that is of interest is the bit below in 
calls.c and it looks like that may well be redundant with the logic in the 
backend already, but I have not done the full analysis to convince myself that 
the code in the backend is sufficient.

-  && (!flag_plt
-  || lookup_attribute ("noplt", DECL_ATTRIBUTES (fndecl_or_type)))
-  && !targetm.binds_local_p (fndecl_or_type))



-fno-plt is a backend specific optimization and should be handled
in backend.



HJ, Thanks for committing the change even when we were discussing the change

This is what I'm primarily concerned about.

Bernd's message was pretty clear in my mind:

https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02861.html

It was conditional approval based on no other target using -fno-plt and 
agreement from the x86 maintainers.


HJ replied that aarch64 uses -fno-plt:


https://gcc.gnu.org/ml/gcc-patches/2015-10/msg02865.html


And then apparently HJ committed the patch.


commit 47b727e5ec3f6f4f0a30ee899adce80185ad6999
Author: hjl 
Date:   Tue Oct 27 14:29:31 2015 +

When reviewers conditionally approve a patch, the conditions need to be 
satisfied before a patch can be committed.  Ignoring the conditions 
seems like a significant breech of trust to me.


HJ, why did you commit the patch given it didn't meet the conditions 
Bernd set forth for approval?


Jeff



Re: [c++-delayed-folding] Introduce convert_to_pointer_nofold

2015-10-27 Thread Marek Polacek
On Sun, Oct 25, 2015 at 04:49:08AM -1000, Jason Merrill wrote:
> On 10/19/2015 05:33 AM, Marek Polacek wrote:
> >+if (fold_p)
> >+  expr = fold_build1_loc (loc, NOP_EXPR, totype, expr);
> >+else
> >+  expr = build1_loc (loc, NOP_EXPR, totype, expr);
> 
> Rather than duplicate code like this everywhere, maybe we should introduce a
> maybe_fold_build1_loc macro that takes fold_p as an argument.

Good point.  Like the following?

Testing in progress.

diff --git gcc/convert.c gcc/convert.c
index 1ce8099..4a6a70d 100644
--- gcc/convert.c
+++ gcc/convert.c
@@ -37,12 +37,17 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "ubsan.h"
 
+#define maybe_fold_build1_loc(FOLD_P, LOC, CODE, TYPE, EXPR) \
+  ((FOLD_P) ? fold_build1_loc (LOC, CODE, TYPE, EXPR)   \
+   : build1_loc (LOC, CODE, TYPE, EXPR))
+
 /* Convert EXPR to some pointer or reference type TYPE.
EXPR must be pointer, reference, integer, enumeral, or literal zero;
-   in other cases error is called.  */
+   in other cases error is called.  If FOLD_P is true, try to fold the
+   expression.  */
 
-tree
-convert_to_pointer (tree type, tree expr)
+static tree
+convert_to_pointer_1 (tree type, tree expr, bool fold_p)
 {
   location_t loc = EXPR_LOCATION (expr);
   if (TREE_TYPE (expr) == type)
@@ -59,9 +64,10 @@ convert_to_pointer (tree type, tree expr)
addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (expr)));
 
if (to_as == from_as)
- return fold_build1_loc (loc, NOP_EXPR, type, expr);
+ return maybe_fold_build1_loc (fold_p, loc, NOP_EXPR, type, expr);
else
- return fold_build1_loc (loc, ADDR_SPACE_CONVERT_EXPR, type, expr);
+ return maybe_fold_build1_loc (fold_p, loc, ADDR_SPACE_CONVERT_EXPR,
+   type, expr);
   }
 
 case INTEGER_TYPE:
@@ -75,20 +81,37 @@ convert_to_pointer (tree type, tree expr)
unsigned int pprec = TYPE_PRECISION (type);
unsigned int eprec = TYPE_PRECISION (TREE_TYPE (expr));
 
-   if (eprec != pprec)
- expr = fold_build1_loc (loc, NOP_EXPR,
- lang_hooks.types.type_for_size (pprec, 0),
- expr);
+   if (eprec != pprec)
+ expr
+   = maybe_fold_build1_loc (fold_p, loc, NOP_EXPR,
+lang_hooks.types.type_for_size (pprec, 0),
+expr);
   }
-
-  return fold_build1_loc (loc, CONVERT_EXPR, type, expr);
+  return maybe_fold_build1_loc (fold_p, loc, CONVERT_EXPR, type, expr);
 
 default:
   error ("cannot convert to a pointer type");
-  return convert_to_pointer (type, integer_zero_node);
+  return convert_to_pointer_1 (type, integer_zero_node, fold_p);
 }
 }
 
+/* A wrapper around convert_to_pointer_1 that always folds the
+   expression.  */
+
+tree
+convert_to_pointer (tree type, tree expr)
+{
+  return convert_to_pointer_1 (type, expr, true);
+}
+
+/* A wrapper around convert_to_pointer_1 that only folds the
+   expression if it is CONSTANT_CLASS_P.  */
+
+tree
+convert_to_pointer_nofold (tree type, tree expr)
+{
+  return convert_to_pointer_1 (type, expr, CONSTANT_CLASS_P (expr));
+}
 
 /* Convert EXPR to some floating-point type TYPE.
 
diff --git gcc/convert.h gcc/convert.h
index ac78f95..24fa6bf 100644
--- gcc/convert.h
+++ gcc/convert.h
@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 extern tree convert_to_integer (tree, tree);
 extern tree convert_to_integer_nofold (tree, tree);
 extern tree convert_to_pointer (tree, tree);
+extern tree convert_to_pointer_nofold (tree, tree);
 extern tree convert_to_real (tree, tree);
 extern tree convert_to_fixed (tree, tree);
 extern tree convert_to_complex (tree, tree);
diff --git gcc/cp/cvt.c gcc/cp/cvt.c
index 0a30270..cb73bb7 100644
--- gcc/cp/cvt.c
+++ gcc/cp/cvt.c
@@ -241,7 +241,7 @@ cp_convert_to_pointer (tree type, tree expr, tsubst_flags_t 
complain)
   gcc_assert (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr)))
  == GET_MODE_SIZE (TYPE_MODE (type)));
 
-  return convert_to_pointer (type, expr);
+  return convert_to_pointer_nofold (type, expr);
 }
 
   if (type_unknown_p (expr))

Marek


  1   2   >