[PATCH] Refactor vectorizer cost model

2015-05-28 Thread Richard Biener

This refactors the vectorizer cost model to call it when everything is
ready, avoiding some fixups.  It also fixes cost compute for
SLP reductions.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2015-05-28  Richard Biener  

* tree-vectorizer.h (struct _slp_instance): Remove body_cost_vec
member.
(SLP_INSTANCE_BODY_COST_VEC): Remove.
(vect_update_slp_costs_according_to_vf): Likewise.
(vect_slp_analyze_operations): Update prototype.
* tree-vect-loop.c (vect_analyze_loop_2): Remove call to
vect_update_slp_costs_according_to_vf, adjust.
* tree-vect-slp.c (vect_free_slp_instance): Adjust.
(vect_analyze_slp_cost_1): Likewise.
(vect_analyze_slp_cost): Likewise.  Properly deal with
widening reduction ops.  Commit body costs.
(vect_analyze_slp_instance): Adjust.  Do not analyze SLP
cost for loops from here.
(vect_slp_analyze_operations): But do it from here when
the vectorization factor is known and stmts are analyzed.
(vect_bb_vectorization_profitable_p): Simplify.
(vect_slp_analyze_bb_1): Do not compute SLP cost here.
(vect_update_slp_costs_according_to_vf): Remove.


Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 223743)
+++ gcc/tree-vect-loop.c(working copy)
@@ -1814,15 +1855,12 @@ vect_analyze_loop_2 (loop_vec_info loop_
  /* Update the vectorization factor based on the SLP decision.  */
  vect_update_vf_for_slp (loop_vinfo);
 
- /* Once VF is set, SLP costs should be updated since the number of
-created vector stmts depends on VF.  */
- vect_update_slp_costs_according_to_vf (loop_vinfo);
-
  /* Analyze operations in the SLP instances.  Note this may
 remove unsupported SLP instances which makes the above
 SLP kind detection invalid.  */
  unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
- vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
+ vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
+  LOOP_VINFO_TARGET_COST_DATA 
(loop_vinfo));
  if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
return false;
}
Index: gcc/tree-vect-slp.c
===
--- gcc/tree-vect-slp.c (revision 223743)
+++ gcc/tree-vect-slp.c (working copy)
@@ -130,7 +130,6 @@ vect_free_slp_instance (slp_instance ins
 {
   vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
   SLP_INSTANCE_LOADS (instance).release ();
-  SLP_INSTANCE_BODY_COST_VEC (instance).release ();
   free (instance);
 }
 
@@ -1546,13 +1545,11 @@ vect_find_last_scalar_stmt_in_slp (slp_t
 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE.  */
 
 static void
-vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
-slp_instance instance, slp_tree node,
+vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
 stmt_vector_for_cost *prologue_cost_vec,
+stmt_vector_for_cost *body_cost_vec,
 unsigned ncopies_for_cost)
 {
-  stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
-
   unsigned i;
   slp_tree child;
   gimple stmt, s;
@@ -1563,9 +1560,8 @@ vect_analyze_slp_cost_1 (loop_vec_info l
   /* Recurse down the SLP tree.  */
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
 if (child)
-  vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
-  instance, child, prologue_cost_vec,
-  ncopies_for_cost);
+  vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
+  body_cost_vec, ncopies_for_cost);
 
   /* Look at the first scalar stmt to determine the cost.  */
   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
@@ -1622,7 +1618,8 @@ vect_analyze_slp_cost_1 (loop_vec_info l
   enum vect_def_type dt;
   if (!op || op == lhs)
continue;
-  if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
+  if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
+ STMT_VINFO_BB_VINFO (stmt_info),
  &def_stmt, &def, &dt))
{
  /* Without looking at the actual initializer a vector of
@@ -1642,8 +1639,7 @@ vect_analyze_slp_cost_1 (loop_vec_info l
 /* Compute the cost for the SLP instance INSTANCE.  */
 
 static void
-vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
-  slp_instance instance, unsigned nunits)
+vect_analyze_slp_cost (slp_instance instance, void *data)
 {
   stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
   unsigned ncopies

Commit: RX: Better use of PUSHM and POPM

2015-05-28 Thread Nick Clifton
Hi Guys,

  I am applying the patch below to enhance the RX backend so that it
  will push and pop multiple groups of registers using the PUSHM and
  POPM instructions, thus reducing code size and increasing
  performance.

Cheers
  Nick

gcc/ChangeLog
2015-05-28  Nick Clifton  

* config/rx/rx.c (push_regs): New function.  Extracts code from...
(rx_expand_prologue): ... here.  Use push_regs to push even small
spans of registers.
(pop_regs): New function.
(rx_expand_epilogue):  Use pop_regs to pop even small spans of
registers.

Index: gcc/config/rx/rx.c
===
--- gcc/config/rx/rx.c  (revision 223737)
+++ gcc/config/rx/rx.c  (working copy)
@@ -1567,6 +1567,10 @@
  has specified --fixed- on the command line and in such
  circumstances we do not want to touch the fixed registers at all.
 
+ Note also that the code in the prologue/epilogue handlers will
+ automatically merge multiple PUSHes of adjacent registers into a single
+ PUSHM.
+
  FIXME: Is it worth improving this heuristic ?  */
   pushed_mask = (-1 << low) & ~(-1 << (high + 1));
   unneeded_pushes = (pushed_mask & (~ save_mask)) & pushed_mask;
@@ -1716,6 +1720,19 @@
   return;
 }
 
+static void
+push_regs (unsigned int high, unsigned int low)
+{
+  rtx insn;
+
+  if (low == high)
+insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, low)));
+  else
+insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1) * 
UNITS_PER_WORD),
+  gen_rx_store_vector (low, high)));
+  mark_frame_related (insn);
+}
+
 void
 rx_expand_prologue (void)
 {
@@ -1725,7 +1742,6 @@
   unsigned int low;
   unsigned int high;
   unsigned int reg;
-  rtx insn;
 
   /* Naked functions use their own, programmer provided prologues.  */
   if (is_naked_func (NULL_TREE))
@@ -1735,7 +1751,7 @@
 
   if (flag_stack_usage_info)
 current_function_static_stack_size = frame_size + stack_size;
-
+  
   /* If we use any of the callee-saved registers, save them now.  */
   if (mask)
 {
@@ -1743,20 +1759,25 @@
   for (reg = CC_REGNUM; reg --;)
if (mask & (1 << reg))
  {
-   insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, reg)));
-   mark_frame_related (insn);
+   low = high = reg;
+
+   /* Look for a span of registers.
+  Note - we do not have to worry about -Os and whether
+  it is better to use a single, longer PUSHM as
+  rx_get_stack_layout has already done that for us.  */
+   while (reg-- > 0)
+ if ((mask & (1 << reg)) == 0)
+   break;
+ else
+   --low;
+
+   push_regs (high, low);
+   if (reg == (unsigned) -1)
+ break;
  }
 }
   else if (low)
-{
-  if (high == low)
-   insn = emit_insn (gen_stack_push (gen_rtx_REG (SImode, low)));
-  else
-   insn = emit_insn (gen_stack_pushm (GEN_INT (((high - low) + 1)
-   * UNITS_PER_WORD),
-  gen_rx_store_vector (low, high)));
-  mark_frame_related (insn);
-}
+push_regs (high, low);
 
   if (MUST_SAVE_ACC_REGISTER)
 {
@@ -2031,6 +2052,16 @@
  && low == 0);
 }
 
+static void
+pop_regs (unsigned int high, unsigned int low)
+{
+  if (high == low)
+emit_insn (gen_stack_pop (gen_rtx_REG (SImode, low)));
+  else
+emit_insn (gen_stack_popm (GEN_INT (((high - low) + 1) * UNITS_PER_WORD),
+  gen_rx_popm_vector (low, high)));
+}
+
 void
 rx_expand_epilogue (bool is_sibcall)
 {
@@ -2143,16 +2174,16 @@
{
  for (reg = 0; reg < CC_REGNUM; reg ++)
if (register_mask & (1 << reg))
- emit_insn (gen_stack_pop (gen_rtx_REG (SImode, reg)));
+ {
+   low = high = reg;
+   while (register_mask & (1 << high))
+ high ++;
+   pop_regs (high - 1, low);
+   reg = high;
+ }
}
   else if (low)
-   {
- if (high == low)
-   emit_insn (gen_stack_pop (gen_rtx_REG (SImode, low)));
- else
-   emit_insn (gen_stack_popm (GEN_INT (regs_size),
-  gen_rx_popm_vector (low, high)));
-   }
+   pop_regs (high, low);
 
   if (is_fast_interrupt_func (NULL_TREE))
{


Re: [debug-early] fix problem with template parameter packs

2015-05-28 Thread Richard Biener
On Wed, May 27, 2015 at 9:34 PM, Jason Merrill  wrote:
> OK, I see the issue.  We're calling debug_abstract_function to build debug
> info for the abstract instance of a function that we already built from
> dwarf2out_early_global_decl.
>
> It occurs to me that the early-dwarf work should make
> debug_abstract_function and most of the DECL_ABSTRACT handling obsolete.
> All we need to do is set DW_AT_inline during early debug and update it
> during late debug if the function is inlined.

Yes, that was my idea as well.  The early dwarf _is_ the "abstract"
variant after all (until we annotate it further without going through
another indirection like I did for LTO).

Richard.

> Jason


Re: fix pr65369.c testcase

2015-05-28 Thread Richard Biener
On Thu, May 28, 2015 at 7:11 AM, DJ Delorie  wrote:
>
> Copied the way other tests get uint32_t.  Ok?

Ok.

> * gcc.c-torture/execute/pr65369.c: Don't assume int is 32 bits.
>
> Index: gcc.c-torture/execute/pr65369.c
> ===
> --- gcc.c-torture/execute/pr65369.c (revision 223737)
> +++ gcc.c-torture/execute/pr65369.c (working copy)
> @@ -1,7 +1,8 @@
>  /* PR tree-optimization/65369 */
> +#include 
>
>  static const char data[] =
>"12345678901234567890123456789012345678901234567890"
>"123456789012345678901234567890";
>
>  __attribute__ ((noinline))
> @@ -11,13 +12,13 @@ static void foo (const unsigned int *buf
>  __builtin_abort ();
>  }
>
>  __attribute__ ((noinline))
>  static void bar (const unsigned char *block)
>  {
> -  unsigned int buf[16];
> +  uint32_t buf[16];
>__builtin_memcpy (buf +  0, block +  0, 4);
>__builtin_memcpy (buf +  1, block +  4, 4);
>__builtin_memcpy (buf +  2, block +  8, 4);
>__builtin_memcpy (buf +  3, block + 12, 4);
>__builtin_memcpy (buf +  4, block + 16, 4);
>__builtin_memcpy (buf +  5, block + 20, 4);


Re: [patch] libjava signal handling for FreeBSD (amd64/i386)

2015-05-28 Thread Andrew Haley
On 27/05/15 20:53, Andreas Tobler wrote:
> Is this ok for trunk?

Excellent, thanks.

Andrew.



[Ada] Speed improvements for controlled types

2015-05-28 Thread Arnaud Charlet
This patch changes the implementation of controlled types so that in simple
cases, they are just as efficient as noncontrolled types where initialization
and cleanup is done by hand.

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-05-27  Bob Duff  

* exp_ch3.adb (Build_Array_Init_Proc, Build_Record_Init_Proc):
Inline init_procs when the type has controlled parts. Remove
obsolete comments about those init_procs -- init_procs for
such types are no longer complex. A typical init_proc just
initializes the 'Tag field, and calls the parent init_proc
(e.g. for Limited_Controlled), which calls the grandparent
(for Root_Controlled), which does nothing. This all boils down
to one instruction when inlined.
* exp_ch7.adb (Create_Finalizer): Inline the finalizer.

Index: exp_ch7.adb
===
--- exp_ch7.adb (revision 223752)
+++ exp_ch7.adb (working copy)
@@ -1440,6 +1440,13 @@
 --  resides, there is no need for elaboration checks.
 
 Set_Kill_Elaboration_Checks (Fin_Id);
+
+--  Inlining the finalizer produces a substantial speedup at -O2.
+--  It is inlined by default at -O3. Either way, it is called
+--  exactly twice (once on the normal path, and once for
+--  exceptions/abort), so this won't bloat the code too much.
+
+Set_Is_Inlined  (Fin_Id);
  end if;
 
  --  Step 2: Creation of the finalizer specification
Index: exp_ch3.adb
===
--- exp_ch3.adb (revision 223754)
+++ exp_ch3.adb (working copy)
@@ -311,7 +311,7 @@
--  Predefined_Primitive_Bodies.
 
function Has_New_Non_Standard_Rep (T : Entity_Id) return Boolean;
-   --  returns True if there are representation clauses for type T that are not
+   --  Returns True if there are representation clauses for type T that are not
--  inherited. If the result is false, the init_proc and the discriminant
--  checking functions of the parent can be reused by a derived type.
 
@@ -761,14 +761,12 @@
 Set_Debug_Info_Off (Proc_Id);
  end if;
 
- --  Set inlined unless controlled stuff or tasks around, in which
- --  case we do not want to inline, because nested stuff may cause
- --  difficulties in inter-unit inlining, and furthermore there is
- --  in any case no point in inlining such complex init procs.
+ --  Set inlined unless tasks are around, in which case we do not
+ --  want to inline, because nested stuff may cause difficulties in
+ --  inter-unit inlining, and furthermore there is in any case no
+ --  point in inlining such complex init procs.
 
- if not Has_Task (Proc_Id)
-   and then not Needs_Finalization (Proc_Id)
- then
+ if not Has_Task (Proc_Id) then
 Set_Is_Inlined (Proc_Id);
  end if;
 
@@ -3619,14 +3617,10 @@
  --  The initialization of protected records is not worth inlining.
  --  In addition, when compiled for another unit for inlining purposes,
  --  it may make reference to entities that have not been elaborated
- --  yet. The initialization of controlled records contains a nested
- --  clean-up procedure that makes it impractical to inline as well,
- --  and leads to undefined symbols if inlined in a different unit.
- --  Similar considerations apply to task types.
+ --  yet. Similar considerations apply to task types.
 
  if not Is_Concurrent_Type (Rec_Type)
and then not Has_Task (Rec_Type)
-   and then not Needs_Finalization (Rec_Type)
  then
 Set_Is_Inlined  (Proc_Id);
  end if;


[Ada] Visibility error of selected component in instance body

2015-05-28 Thread Arnaud Charlet
This patch fixes a spurious visibility error on a selected component in an
instance body, when the type of the prefix of the selected component is an
actual of the instance, and the desired component is inherited through
one or more derivations

The package derived.ads below must compile quietly:

--
package AST is

   type AST_Node_Type is abstract tagged private;
   type AST_Node_Access is access AST_Node_Type;
   type AST_Node is access all AST_Node_Type'Class;

   procedure Compute_Indent_Level (Node : access AST_Node_Type) is abstract;

private

   type AST_Node_Type is abstract tagged record
  Indent_Level : Natural;
   end record;

end AST;
---
generic
   type Node_Type is abstract new AST_Node_Type with private;
   type Node is access all Node_Type'Class;
package AST.List is

   type List_Type is new AST_Node_Type with record
  N : Node;
   end record;

   overriding
   procedure Compute_Indent_Level (Node : access List_Type);

end AST.List;
package body AST.List is

   overriding
   procedure Compute_Indent_Level (Node : access List_Type) is
   begin
  Node.N.Indent_Level := Node.Indent_Level;
   end Compute_Indent_Level;

end AST.List;
---
with AST; use AST;
with AST.List;

package Derived is

   type Derived_Type is abstract new AST_Node_Type with null record;
   type Derived is access all Derived_Type'Class;

   package Lists is new AST.List
 (Node_Type => Derived_Type,
  Node  => Derived);

end Derived;

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-05-27  Ed Schonberg  

* sem_ch4.adb (Analyze_Selected_Component): If the type to use
is a derived type and is a generic actual, the selected component
appears within an instance body, and the check over the type
has failed, examine ancestor types for the desired component.
(Find_Component_In_Instance): If record type is a derived type,
examine all ancestors in order to locate desired component.

Index: sem_ch4.adb
===
--- sem_ch4.adb (revision 223754)
+++ sem_ch4.adb (working copy)
@@ -4102,7 +4102,8 @@
   --  searches have failed. If a match is found, the Etype of both N and
   --  Sel are set from this component, and the entity of Sel is set to
   --  reference this component. If no match is found, Entity (Sel) remains
-  --  unset.
+  --  unset. For a derived type that is an actual of the instance, the
+  --  desired component may be found in any ancestor.
 
   function Has_Mode_Conformant_Spec (Comp : Entity_Id) return Boolean;
   --  It is known that the parent of N denotes a subprogram call. Comp
@@ -4117,18 +4118,36 @@
 
   procedure Find_Component_In_Instance (Rec : Entity_Id) is
  Comp : Entity_Id;
+ Typ  : Entity_Id;
 
   begin
- Comp := First_Component (Rec);
- while Present (Comp) loop
-if Chars (Comp) = Chars (Sel) then
-   Set_Entity_With_Checks (Sel, Comp);
-   Set_Etype (Sel, Etype (Comp));
-   Set_Etype (N,   Etype (Comp));
+ Typ := Rec;
+ while Present (Typ) loop
+Comp := First_Component (Typ);
+while Present (Comp) loop
+   if Chars (Comp) = Chars (Sel) then
+  Set_Entity_With_Checks (Sel, Comp);
+  Set_Etype (Sel, Etype (Comp));
+  Set_Etype (N,   Etype (Comp));
+  return;
+   end if;
+
+   Next_Component (Comp);
+end loop;
+
+--  If not found, the component may be declared in the parent
+--  type or its full view, if any.
+
+if Is_Derived_Type (Typ) then
+   Typ := Etype (Typ);
+
+   if Is_Private_Type (Typ) then
+  Typ := Full_View (Typ);
+   end if;
+
+else
return;
 end if;
-
-Next_Component (Comp);
  end loop;
 
  --  If we fall through, no match, so no changes made
@@ -4789,6 +4808,18 @@
  Par := Etype (Par);
   end loop;
 
+   --  Another special case: the type is an extension of a private
+   --  type T, is an actual in an instance, and we are in the body
+   --  of the instance, so the generic body had a full view of the
+   --  type declaration for T or of some ancestor that defines the
+   --  component in question.
+
+   elsif Is_Derived_Type (Type_To_Use)
+ and then Used_As_Generic_Actual (Type_To_Use)
+ and then In_Instance_Body
+   then
+  Find_Component_In_Instance (Parent_Subtype (Type_To_Use));
+
--  In ASIS mode the generic parent type may be absent. Examine
--  the parent type directly for a component that may have been
--  visible in

[PATCH] auto-wipe dump files, part1, manual stuff

2015-05-28 Thread Bernhard Reutner-Fischer
Ontop of this patch you would have to

git grep -l -E "(cleanup-.*-dump|cleanup-saved-temps)" \
  | egrep -v "(ChangeLog|/lib/)" \
  | sed -e "s|[^/]*$||" | sort | uniq \
  | while read d;
do
  find $d -type f -exec \
sed -i -e "/cleanup-[^-]*[-]*dump/d;/cleanup-saved-temps/d" {} +
done
---

I had to add dg-keep-saved-temps for the following two testcases:
g++.dg/pch/pch.C gcc.dg/pch/save-temps-1.c

The following testcases had
/* { dg-final { cleanup-saved-temps "ivopts" } } */
resp cleanup-saved-temps "pr43597" or "[dt][fi]mode_off"
which does nothing AFAICT and thus will be removed by above sed.
gcc.target/arm/ivopts-2.c
gcc.target/arm/ivopts-3.c
gcc.target/arm/ivopts-4.c
gcc.target/arm/ivopts-5.c
gcc.target/arm/ivopts.c
gcc.target/arm/pr43597.c
gcc.target/powerpc/dfmode_off.c
gcc.target/powerpc/dimode_off.c
gcc.target/powerpc/tfmode_off.c
gcc.target/powerpc/timode_off.c

I do expect fallout for a couple of days when people push new testcases
and don't have picked up the removal of the procs in their test-runs just yet.

Since you, Mike, OKed the patch in gcc-5 stage-4 for the next stage1, i will
commit this patch (plus the sed in the same revision) in a couple of
hours.

gcc/testsuite/ChangeLog

2015-05-28  Bernhard Reutner-Fischer  

* lib/gcc-dg.exp (cleanup-ipa-dump, cleanup-rtl-dump,
cleanup-tree-dump, cleanup-dump, cleanup-saved-temps): Remove.
Adjust all callers.
(schedule-cleanups, dg-keep-saved-temps): New proc.
(gcc-dg-test-1): Schedule cleanups.
* lib/profopt.exp (profopt-execute): Likewise.
* g++.dg/cdce3.C: Adjust expected line numbers.
* gcc.dg/cdce1.c: Likewise.
* gcc.dg/cdce2.c: Likewise.
* gcc.dg/strlenopt-22.c: Fix comment delimiter.
* gcc.dg/strlenopt-24.c: Likewise.
* gcc.dg/tree-ssa/vrp26.c: Likewise.
* gcc.dg/tree-ssa/vrp28.c: Likewise.
* obj-c++.dg/encode-2.mm: Likewise.

libgomp/ChangeLog

2015-05-28  Bernhard Reutner-Fischer  

* testsuite/libgomp.graphite/bounds.c: Adjust for
cleanup-tree-dump removal.
* testsuite/libgomp.graphite/force-parallel-1.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-2.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-3.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-4.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-5.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-6.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-7.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-8.c: Likewise.
* testsuite/libgomp.graphite/force-parallel-9.c: Likewise.
* testsuite/libgomp.graphite/pr41118.c: Likewise.


gcc/ChangeLog

2015-05-28  Bernhard Reutner-Fischer  

* config/arm/neon-testgen.ml (emit_epilogue): Remove manual call
to cleanup-saved-temps.

gcc/doc/ChangeLog

2015-05-28  Bernhard Reutner-Fischer  

* doc/sourcebuild.texi (Clean up generated test files): Expand
introduction.
(cleanup-ipa-dump, cleanup-rtl-dump, cleanup-tree-dump,
cleanup-saved-temps): Remove.
(dg-keep-saved-temps): Document new proc.


 gcc/config/arm/neon-testgen.ml  |1 -
 gcc/doc/sourcebuild.texi|   30 +++---
 gcc/testsuite/g++.dg/cdce3.C|5 +-
 gcc/testsuite/g++.dg/pch/pch.C  |3 +-
 gcc/testsuite/gcc.dg/cdce1.c|3 +-
 gcc/testsuite/gcc.dg/cdce2.c|3 +-
 gcc/testsuite/gcc.dg/pch/save-temps-1.c |3 +-
 gcc/testsuite/gcc.dg/strlenopt-22.c |3 +-
 gcc/testsuite/gcc.dg/strlenopt-24.c |3 +-
 gcc/testsuite/gcc.dg/tree-ssa/vrp26.c   |3 +-
 gcc/testsuite/gcc.dg/tree-ssa/vrp28.c   |3 +-
 gcc/testsuite/lib/gcc-dg.exp|  170 +++
 gcc/testsuite/lib/profopt.exp   |3 +
 gcc/testsuite/obj-c++.dg/encode-2.mm|3 +-
 14 files changed, 157 insertions(+), 79 deletions(-)

diff --git a/gcc/config/arm/neon-testgen.ml b/gcc/config/arm/neon-testgen.ml
index 0a2a442..3164ab7 100644
--- a/gcc/config/arm/neon-testgen.ml
+++ b/gcc/config/arm/neon-testgen.ml
@@ -138,7 +138,6 @@ let emit_epilogue chan features regexps =
  else
()
 );
-Printf.fprintf chan "/* { dg-final { cleanup-saved-temps } } */\n"
 
 (* Check a list of C types to determine which ones are pointers and which
ones are const.  *)
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index c6ef40e..cb41b01 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2210,13 +2210,17 @@ Check branch and/or call counts, in addition to line 
counts, in
 
 @subsubsection Clean up generated test files
 
+Usually the test-framework removes files that were generated during
+testing. If a testcase, for example, uses any dumping mechanism to
+inspect a passes dump file, the testsuite recognized the dump option
+passed to the tool and 

[PATCH v3] libiberty: cleanup Makefile.in

2015-05-28 Thread Bernhard Reutner-Fischer
* configure.ac (TARGETLIB_PIC, TARGETLIB_NOASAN): New variables.
* configure: Regenerate.
* maint-tool: Refactor pic/ and noasan/ handling.
* Makefile.in: Likewise. Regenerate dependencies.

---
The below does the same but attempts to be limited to what POSIX
requires a make(1) to provide. Seems to compile fine with gnu-make and
bmake.

Changes v2 -> v3:
- drop unneeded SUB_CFLAGS
- add dependencies on %.c for stamp-pic-ofiles and stamp-noasan-ofiles

Ok for trunk?

Signed-off-by: Bernhard Reutner-Fischer 
---
 libiberty/Makefile.in  | 2408 +---
 libiberty/configure|8 +
 libiberty/configure.ac |6 +
 libiberty/maint-tool   |   33 +-
 4 files changed, 1487 insertions(+), 968 deletions(-)

diff --git a/libiberty/Makefile.in b/libiberty/Makefile.in
index f06cc69..ca4e75f 100644
--- a/libiberty/Makefile.in
+++ b/libiberty/Makefile.in
@@ -68,6 +68,7 @@ MAKEOVERRIDES =
 
 TARGETLIB = ./libiberty.a
 TESTLIB = ./testlib.a
+TARGETLIBS = $(TARGETLIB) @TARGETLIB_PIC@ @TARGETLIB_NOASAN@
 
 LIBOBJS = @LIBOBJS@
 
@@ -102,7 +103,7 @@ FLAGS_TO_PASS = \
 SUBDIRS = testsuite
 
 # FIXME: add @BUILD_INFO@ once we're sure it works for everyone.
-all: stamp-picdir stamp-noasandir $(TARGETLIB) required-list all-subdir
+all: $(TARGETLIBS) required-list all-subdir
@: $(MAKE) ; $(MULTIDO) $(FLAGS_TO_PASS) multi-do DO=all
 
 .PHONY: check installcheck
@@ -244,24 +245,24 @@ INSTALLED_HEADERS =   
  \
$(INCDIR)/timeval-utils.h
 
 $(TARGETLIB): $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS)
-   -rm -f $(TARGETLIB) pic/$(TARGETLIB) noasan/$(TARGETLIB)
-   $(AR) $(AR_FLAGS) $(TARGETLIB) \
+   -rm -f $@
+   $(AR) $(AR_FLAGS) $@ \
  $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS)
-   $(RANLIB) $(TARGETLIB)
-   if [ x"$(PICFLAG)" != x ]; then \
- cd pic; \
- $(AR) $(AR_FLAGS) $(TARGETLIB) \
-   $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS); \
- $(RANLIB) $(TARGETLIB); \
- cd ..; \
-   else true; fi; \
-   if [ x"$(NOASANFLAG)" != x ]; then \
- cd noasan; \
- $(AR) $(AR_FLAGS) $(TARGETLIB) \
-   $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS); \
- $(RANLIB) $(TARGETLIB); \
- cd ..; \
-   else true; fi
+   $(RANLIB) $@
+
+TARGETLIB_PIC_OFILES = `echo $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS) | \
+sed -e 's,[^/ ]*/,./pic/,g';`
+./pic/libiberty.a: stamp-pic-ofiles
+   -rm -f $@
+   $(AR) $(AR_FLAGS) $@ $(TARGETLIB_PIC_OFILES)
+   $(RANLIB) $@
+
+TARGETLIB_NOASAN_OFILES =`echo $(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS) | 
\
+sed -e 's,[^/ ]*/,./noasan/,g';`
+./noasan/libiberty.a: stamp-noasan-ofiles
+   -rm -f $@
+   $(AR) $(AR_FLAGS) $@ $(TARGETLIB_NOASAN_OFILES)
+   $(RANLIB) $@
 
 $(TESTLIB): $(REQUIRED_OFILES) $(CONFIGURED_OFILES)
-rm -f $(TESTLIB)
@@ -393,17 +394,15 @@ install_to_tooldir: all
 required-list: Makefile
echo $(REQUIRED_OFILES) > required-list
 
-stamp-picdir:
-   if [ x"$(PICFLAG)" != x ] && [ ! -d pic ]; then \
- mkdir pic; \
-   else true; fi
-   touch stamp-picdir
+stamp-pic-ofiles: $(CFILES:%=$(srcdir)/%)
+   [ -d pic ] && : || mkdir pic
+   $(MAKE) $(FLAGS_TO_PASS) $(TARGETLIB_PIC_OFILES)
+   touch $@
 
-stamp-noasandir:
-   if [ x"$(NOASANFLAG)" != x ] && [ ! -d noasan ]; then \
- mkdir noasan; \
-   else true; fi
-   touch stamp-noasandir
+stamp-noasan-ofiles: $(CFILES:%=$(srcdir)/%)
+   [ -d noasan ] && : || mkdir noasan
+   $(MAKE) $(FLAGS_TO_PASS) $(TARGETLIB_NOASAN_OFILES)
+   touch $@
 
 .PHONY: all etags tags ls clean stage1 stage2
 
@@ -444,7 +443,7 @@ maint-deps :
 mostlyclean: mostlyclean-subdir
-rm -rf *.$(objext) pic noasan core errs \#* *.E a.out
-rm -f errors dummy config.h stamp-*
-   -rm -f $(CONFIG_H) stamp-picdir stamp-noasandir
+   -rm -f $(CONFIG_H)
-rm -f libiberty.aux libiberty.cp libiberty.cps libiberty.fn 
libiberty.ky
-rm -f libiberty.log libiberty.tmp libiberty.tps libiberty.pg
-rm -f libiberty.pgs libiberty.toc libiberty.tp libiberty.tpl 
libiberty.vr
@@ -501,9 +500,6 @@ maintainer-clean-subdir: config.h
  cd $$dir && $(MAKE) $(FLAGS_TO_PASS) $$target; \
done
 
-$(REQUIRED_OFILES) $(EXTRA_OFILES) $(LIBOBJS): stamp-picdir stamp-noasandir
-$(CONFIGURED_OFILES): stamp-picdir stamp-noasandir
-
 # Don't export variables to the environment, in order to not confuse
 # configure.
 .NOEXPORT:
@@ -511,1200 +507,1708 @@ $(CONFIGURED_OFILES): stamp-picdir stamp-noasandir
 # The dependencies in the remainder of this file are automatically
 # generated by "make maint-deps".  Manual edits will be lost.
 
-./_doprnt.$(objext): $(srcdir)/_doprnt.c config.h $(INCDIR)/ansidecl.h \
-   $(INCDIR)/safe-ctype.h
-   if [ x"$(PICFLAG)" != x ]; the

Re: [PATCH, RFC] fortran [was Re: #pragma GCC unroll support]

2015-05-28 Thread Bernhard Reutner-Fischer
On 3 February 2015 at 01:07, Mike Stump  wrote:
> On Feb 2, 2015, at 3:22 PM, Bernhard Reutner-Fischer  
> wrote:
>> Untested draft patch
>
> I looked it over, seems to slot in nicely.
>
>
> +   gfc_error ("% directive does not commence a loop at %C”);
>
> So, don’t like commence here.

Does anybody have a better suggestion?

directive not at the start of a loop at %C
directive not followed by a loop at %C

dunno..

Mike, did you tweak the one or two things you got from the reviews
yet? ISTM your
main patch was not OKed yet nor installed.

thanks,


Re: [PATCH] Fix PR66142

2015-05-28 Thread Andreas Schwab
Richard Biener  writes:

>   * gcc.dg/tree-ssa/ssa-fre-44.c: New testcase.

On ia64:

$ grep -c "return 3.0" ssa-fre-44.c.035t.fre1 
3
$ tail ssa-fre-44.c.035t.fre1
  _19 = _8->u.x;
  _23 = _8->u.y;
  f_24 = _19 + _23;
  bar (&p);
  p ={v} {CLOBBER};
  return f_24;

}



Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


[gomp4, committed] Remove superfluous main in kernels-loop-n.c

2015-05-28 Thread Tom de Vries

Hi,

this patch removes a superfluous main function from a test-case.

Committed.

Thanks,
- Tom
Remove superfluous main in kernels-loop-n.c

2015-05-27  Tom de Vries  

	* c-c++-common/goacc/kernels-loop-n.c (main): Remove.

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
index 7bf744e..5f7c1df6 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
@@ -7,7 +7,7 @@
 #define N ((1024 * 512) + 1)
 #define COUNTERTYPE unsigned int
 
-static int __attribute__((noinline,noclone))
+int
 foo (COUNTERTYPE n)
 {
   unsigned int *__restrict a;
@@ -41,8 +41,3 @@ foo (COUNTERTYPE n)
   return 0;
 }
 
-int
-main (void)
-{
-  return foo (N);
-}
-- 
1.9.1



[gomp4, committed] Enable parallelization of kernels-loop-n.c

2015-05-28 Thread Tom de Vries

Hi,

I've committed this patch.

It enables parallelization of the kernels-loops-n.c testcase.

This is now possible, due the commit of the fix for PR65637 to the 
gomp-4_0-branch ( 
https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01569.html ).


Thanks,
- Tom
Enable parallelization of kernels-loop-n.c

2015-05-27  Tom de Vries  

	* tree-parloops.c (parallelize_loops): Remove checks limiting type of
	loops allowed.

	* c-c++-common/goacc/kernels-loop-n.c: Check for parallelization.

diff --git a/gcc/testsuite/ChangeLog.gomp b/gcc/testsuite/ChangeLog.gomp
index a5f1167..9b657fb 100644
--- a/gcc/testsuite/ChangeLog.gomp
+++ b/gcc/testsuite/ChangeLog.gomp
@@ -1,5 +1,9 @@
 2015-05-27  Tom de Vries  
 
+	* c-c++-common/goacc/kernels-loop-n.c: Check for parallelization.
+
+2015-05-27  Tom de Vries  
+
 	* c-c++-common/goacc/kernels-loop-n.c (main): Remove.
 
 2015-05-15  Cesar Philippidis  
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
index 5f7c1df6..d227786 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c
@@ -1,6 +1,7 @@
 /* { dg-additional-options "-O2" } */
 /* { dg-additional-options "-ftree-parallelize-loops=32" } */
-/* TODO: parallelize this example.  */
+/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */
+/* { dg-additional-options "-fdump-tree-optimized" } */
 
 #include 
 
@@ -41,3 +42,14 @@ foo (COUNTERTYPE n)
   return 0;
 }
 
+/* Check that only one loop is analyzed, and that it can be parallelized.  */
+/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
+/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */
+
+/* Check that the loop has been split off into a function.  */
+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*._omp_fn.0" 1 "optimized" } } */
+
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
+
+/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index f5bc8b3..e10179d 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2273,24 +2273,6 @@ parallelize_loops (bool oacc_kernels_p)
 	  if (loop->inner)
 	continue;
 
-	  gcc_assert (single_succ_p (region_entry));
-	  basic_block first = single_succ (region_entry);
-
-	  /* TODO: Allow conditional loop entry.  This test triggers when the
-	 loop bound is not known at compile time.  */
-	  if (!single_succ_p (first))
-	continue;
-
-	  /* TODO: allow more complex loops.  */
-	  if (single_exit (loop) == NULL)
-	continue;
-
-	  /* TODO: Allow other code than a single loop inside a kernels
-	 region.  */
-	  if (loop->header != single_succ (first)
-	  || single_exit (loop)->dest != region_exit)
-	continue;
-
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file,
 		 "Trying loop %d with header bb %d in oacc kernels region\n",
-- 
1.9.1



Re: [PATCH 13/14][ARM/AArch64 testsuite] Use gcc-dg-runtest in advsimd-intrinsics.exp

2015-05-28 Thread Alan Lawrence

Christophe Lyon wrote:

On 26 May 2015 at 18:25, Alan Lawrence  wrote:

I don't see this symptom - I am able to execute such subsets with either my,
or Sandra's, advsimd-intrinsics.exp.


I didn't try to run with your patch, I thought it was an oversight of yours.

Sorry, indeed I've just checked that gcc-dg-runtest includes the filter.


Is it that you have to check runtest_file_p because you are setting
gcc_parallel_test_enable to 0?

I'm doing more testing now, but I think I can drop my advsimd-intrinsics.exp
changes altogether; I'll post an updated patch series shortly.

In the meantime I'm curious as to why you found the gcc_parallel_test_enable
necessary? (And is it safe to reset it to 1 afterwards, rather than to a
saved value?)

See https://gcc.gnu.org/ml/gcc/2014-10/msg00081.html


So after working through the differences between Sandra's and my patch, I find 
the existing advsimd-intrinsics.exp achieves pretty much the same thing, and 
preserves the same list of test variants (e.g. the -Og -g from 
set-torture-options which I had removed).


However, I've tried testing advsimd-intrinsics.exp (both the whole thing, and 
individual tests using RUNTESTFLAGS) with and without this hunk:


@@ -57,20 +57,7 @@ set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTI
 set additional_flags [add_options_for_arm_neon ""]

 # Main loop.
-foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
-# If we're only testing specific files and this isn't one of them, skip it.
-if ![runtest_file_p $runtests $src] then {
-   continue
-}
-
-# runtest_file_p is already run above, and the code below can run
-# runtest_file_p again, make sure everything for this test is
-# performed if the above runtest_file_p decided this runtest
-# instance should execute the test
-gcc_parallel_test_enable 0
-gcc-dg-runtest $src "" $additional_flags
-gcc_parallel_test_enable 1
-}
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] "" 
${additional_flags}


and find exactly the same tests are run and pass. My hypothesis is thus that you 
only need the explicit loop, manual checking of runtest_file_p, and 
gcc_parallel_test_enable, in order to do *both* c-torture-execute *and* 
gcc-dg-runtest; since we are now only doing the latter, this is unnecessary. 
Does that make sense? (If you agree, I'll propose that as a standalone cleanup 
patch.)


Cheers, Alan



Re: [Patch AArch64] PR target/66200 - gcc / libstdc++ TLC for weak memory models.

2015-05-28 Thread James Greenhalgh
On Wed, May 20, 2015 at 02:58:09PM +0100, Ramana Radhakrishnan wrote:
> Hi,
> 
>   Someone privately pointed out that the ARM and AArch64 ports do not 
> define TARGET_RELAXED_ORDERING given that the architecture(s) mandates a 
> weak memory model. This patch fixes it for AArch64, the ARM patch 
> follows in due course after appropriate testing.
> 
> I will also note that we can define __test_and_acquire as well as 
> __set_and_release and I'm toying with a follow-up patch for the same.
> 
> Also it may make sense to consider changing the defaults to a safer 
> form, or indeed forcing ports to define some of this rather than 
> allowing for silent wrong code issues. However I'm not about to do so in 
> the context of this patch.
> 
> Bootstrapped and regression tested on aarch64-none-linux-gnu with no 
> regressions.
> 
> Ok to apply to trunk and all release branches ?
> 
> gcc/
> 
> PR target/66200
> 
> * config/aarch64/aarch64.c (TARGET_RELAXED_ORDERING): Define
> 
> libstdc++-v3/
> 
> PR target/66200
> 
> * configure.host (host_cpu): Add aarch64 case.
> * config/cpu/aarch64/atomic_word.h: New file

This is OK for trunk and looks serious enough for backport to the
release branches.

Though,

> +// This is necessary iff TARGET_RELAXED_ORDERING is defined in tm.h.

this comment had me looking for uses of TARGET_RELAXED_ORDERING as a macro
rather than a hook, and left... But fixing that (here and in the
template) is not essential for this patch, or for backporting.

Thanks,
James

> P.S.  It's interesting to note that ia64 doesn't define the barriers 
> which appear to be used in a number of other places than just the 
> constructor guard functions (probably wrongly on the assumption that one 
> doesn't need the barriers elsewhere). I suspect other architectures like 
> MIPS may also be affected by this.

> commit 414345c424fa020717c6c3083089cd987f3032db
> Author: Ramana Radhakrishnan 
> Date:   Wed May 20 13:55:44 2015 +0100
> 
> Add relaxed memory ordering cases.
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 7f0cc0d..273aa06 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -11644,6 +11644,9 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
>  #undef TARGET_SCHED_FUSION_PRIORITY
>  #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
>  
> +#undef TARGET_RELAXED_ORDERING
> +#define TARGET_RELAXED_ORDERING true
> +
>  struct gcc_target targetm = TARGET_INITIALIZER;
>  
>  #include "gt-aarch64.h"
> diff --git a/libstdc++-v3/config/cpu/aarch64/atomic_word.h 
> b/libstdc++-v3/config/cpu/aarch64/atomic_word.h
> new file mode 100644
> index 000..4afe6ed
> --- /dev/null
> +++ b/libstdc++-v3/config/cpu/aarch64/atomic_word.h
> @@ -0,0 +1,44 @@
> +// Low-level type for atomic operations -*- C++ -*-
> +
> +// Copyright (C) 2015 Free Software Foundation, Inc.
> +//
> +// This file is part of the GNU ISO C++ Library.  This library is free
> +// software; you can redistribute it and/or modify it under the
> +// terms of the GNU General Public License as published by the
> +// Free Software Foundation; either version 3, or (at your option)
> +// any later version.
> +
> +// This library is distributed in the hope that it will be useful,
> +// but WITHOUT ANY WARRANTY; without even the implied warranty of
> +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +// GNU General Public License for more details.
> +
> +// Under Section 7 of GPL version 3, you are granted additional
> +// permissions described in the GCC Runtime Library Exception, version
> +// 3.1, as published by the Free Software Foundation.
> +
> +// You should have received a copy of the GNU General Public License and
> +// a copy of the GCC Runtime Library Exception along with this program;
> +// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +// .
> +
> +/** @file atomic_word.h
> + *  This file is a GNU extension to the Standard C++ Library.
> + */
> +
> +#ifndef _GLIBCXX_ATOMIC_WORD_H
> +#define _GLIBCXX_ATOMIC_WORD_H   1
> +
> +
> +typedef int _Atomic_word;
> +
> +// This one prevents loads from being hoisted across the barrier;
> +// in other words, this is a Load-Load acquire barrier.
> +// This is necessary iff TARGET_RELAXED_ORDERING is defined in tm.h.
> +#define _GLIBCXX_READ_MEM_BARRIER __asm __volatile ("dmb ishld":::"memory")
> +
> +// This one prevents stores from being sunk across the barrier; in other
> +// words, a Store-Store release barrier.
> +#define _GLIBCXX_WRITE_MEM_BARRIER __asm __volatile ("dmb ishst":::"memory")
> +
> +#endif
> diff --git a/libstdc++-v3/configure.host b/libstdc++-v3/configure.host
> index a349ce3..42a45d9 100644
> --- a/libstdc++-v3/configure.host
> +++ b/libstdc++-v3/configure.host
> @@ -153,6 +153,9 @@ esac
>  # Most can just use generic.
>  # THIS TABLE IS SORTED.  KEEP IT THAT WAY.
>  case "${host_cpu}" in
> +  aarch64*)
> +   

Re: [Patch AArch64] PR target/66200 - gcc / libstdc++ TLC for weak memory models.

2015-05-28 Thread James Greenhalgh
On Thu, May 21, 2015 at 09:54:19AM +0100, Ramana Radhakrishnan wrote:
> And here's an additional patch for the testsuite which was missed in the 
> original posting.
> 
> This is a testism that's testing code generation as per 
> TARGET_RELAXED_ORDERING being false and therefore needs to be adjusted 
> as attached.
> 
> Ramana
> 
> PR target/66200
> * g++.dg/abi/aarch64_guard1.C: Adjust testcase.

OK.

Thanks,
James

> diff --git a/gcc/testsuite/g++.dg/abi/aarch64_guard1.C 
> b/gcc/testsuite/g++.dg/abi/aarch64_guard1.C
> index ca1778b..e78f93c 100644
> --- a/gcc/testsuite/g++.dg/abi/aarch64_guard1.C
> +++ b/gcc/testsuite/g++.dg/abi/aarch64_guard1.C
> @@ -13,5 +13,4 @@ int *foo ()
>  }
>  
>  // { dg-final { scan-assembler _ZGVZ3foovE1x,8,8 } }
> -// { dg-final { scan-tree-dump "_ZGVZ3foovE1x & 1" "original" } }
>  // { dg-final { cleanup-tree-dump "original" } }



Re: [PATCH] Fix PR66142

2015-05-28 Thread Richard Biener
On Wed, 27 May 2015, Kyrill Tkachov wrote:

> Hi Richard,
> 
> On 26/05/15 14:54, Richard Biener wrote:
> > The following fixes the testcase in PR66142
> > 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> > 
> > Richard.
> > 
> > 2015-05-26  Richard Biener  
> > 
> > PR tree-optimization/66142
> > * tree-ssa-sccvn.c (vn_reference_lookup_3): Manually compare
> > MEM_REFs for the same base address.
> > 
> > * gcc.dg/tree-ssa/ssa-fre-44.c: New testcase.
> > 
> > Index: gcc/tree-ssa-sccvn.c
> > ===
> > --- gcc/tree-ssa-sccvn.c(revision 223574)
> > +++ gcc/tree-ssa-sccvn.c(working copy)
> > @@ -1894,7 +1894,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree
> > size2 = lhs_ref.size;
> > maxsize2 = lhs_ref.max_size;
> > if (maxsize2 == -1
> > - || (base != base2 && !operand_equal_p (base, base2, 0))
> > + || (base != base2
> > + && (TREE_CODE (base) != MEM_REF
> > + || TREE_CODE (base2) != MEM_REF
> > + || TREE_OPERAND (base, 0) != TREE_OPERAND (base2, 0)
> > + || !tree_int_cst_equal (TREE_OPERAND (base, 1),
> > + TREE_OPERAND (base2, 1
> >   || offset2 > offset
> >   || offset2 + size2 < offset + maxsize)
> > return (void *)-1;
> > Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c
> > ===
> > --- gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c  (revision 0)
> > +++ gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-44.c  (working copy)
> > @@ -0,0 +1,62 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O -fdump-tree-fre1" } */
> > +
> > +struct A { float x, y; };
> > +struct B { struct A u; };
> > +void bar (struct A *);
> > +
> > +float
> > +f1 (struct B *x, int y)
> > +{
> > +  struct A p;
> > +  p.x = 1.0f;
> > +  p.y = 2.0f;
> > +  struct A *q = &x[y].u;
> > +  *q = p;
> > +  float f = x[y].u.x + x[y].u.y;
> > +  bar (&p);
> > +  return f;
> > +}
> > +
> > +float
> > +f2 (struct B *x, int y)
> > +{
> > +  struct A p;
> > +  p.x = 1.0f;
> > +  p.y = 2.0f;
> > +  x[y].u = p;
> > +  float f = x[y].u.x + x[y].u.y;
> > +  bar (&p);
> > +  return f;
> > +}
> > +
> > +float
> > +f3 (struct B *x, int y)
> > +{
> > +  struct A p;
> > +  p.x = 1.0f;
> > +  p.y = 2.0f;
> > +  struct A *q = &x[y].u;
> > +  __builtin_memcpy (&q->x, &p.x, sizeof (float));
> > +  __builtin_memcpy (&q->y, &p.y, sizeof (float));
> > +  *q = p;
> > +  float f = x[y].u.x + x[y].u.y;
> > +  bar (&p);
> > +  return f;
> > +}
> > +
> > +float
> > +f4 (struct B *x, int y)
> > +{
> > +  struct A p;
> > +  p.x = 1.0f;
> > +  p.y = 2.0f;
> > +  __builtin_memcpy (&x[y].u.x, &p.x, sizeof (float));
> > +  __builtin_memcpy (&x[y].u.y, &p.y, sizeof (float));
> > +  float f = x[y].u.x + x[y].u.y;
> > +  bar (&p);
> > +  return f;
> > +}
> 
> I see this test failing on arm-none-eabi. In particular, the f4 dump is the
> only one
> that doesn't contain "return 3.0". Instead it is:
> f4 (struct B * x, int y)
> {
>   float f;
>   struct A p;
>   unsigned int y.3_5;
>   unsigned int _6;
>   struct B * _8;
>   float * _9;
>   float * _14;
>   float _19;
>   float _23;
> 
>   :
>   p.x = 1.0e+0;
>   p.y = 2.0e+0;
>   y.3_5 = (unsigned int) y_4(D);
>   _6 = y.3_5 * 8;
>   _8 = x_7(D) + _6;
>   _9 = &_8->u.x;
>   __builtin_memcpy (_9, &p.x, 4);
>   _14 = &_8->u.y;
>   __builtin_memcpy (_14, &p.y, 4);
>   _19 = _8->u.x;
>   _23 = _8->u.y;
>   f_24 = _19 + _23;
>   bar (&p);
>   p ={v} {CLOBBER};
>   return f_24;
> 
> }
> 
> Thanks,
> Kyrill

Thanks - the following patch fixes this (tested with a cross).  It
also removes a spurious aggregate assignment from f3 which makes
it fail without the patch as well (as expected).

Bootstrap / regtest in progress on x86_64-unknown-linux-gnu.

Richard.

2015-05-28  Richard Biener  

PR tree-optimization/66142
* tree-ssa-sccvn.c (vn_reference_lookup_3): Handle non-GIMPLE
values better in memcpy destination handling.  Handle non-aliasing
we discover here.

* gcc.dg/tree-ssa/ssa-fre-44.c: Fixup.

Index: gcc/tree-ssa-sccvn.c
===
*** gcc/tree-ssa-sccvn.c(revision 223802)
--- gcc/tree-ssa-sccvn.c(working copy)
*** vn_reference_lookup_3 (ao_ref *ref, tree
*** 2028,2034 
lhs = gimple_call_arg (def_stmt, 0);
lhs_offset = 0;
if (TREE_CODE (lhs) == SSA_NAME)
!   lhs = SSA_VAL (lhs);
if (TREE_CODE (lhs) == ADDR_EXPR)
{
  tree tem = get_addr_base_and_unit_offset (TREE_OPERAND (lhs, 0),
--- 2028,2043 
lhs = gimple_call_arg (def_stmt, 0);
lhs_offset = 0;
if (TREE_CODE (lhs) == SSA_NAME)
!   {
! lhs = SSA_VAL (lhs);
! if (TREE_CODE (lhs) == SSA_NAME)
!   {
! gimple def_stmt = SSA_NAME_DEF_STMT (lhs)

Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread David Malcolm
On Wed, 2015-05-27 at 15:56 +0200, mliska wrote:
> gcc/ChangeLog:
> 
> 2015-04-30  Martin Liska  
> 
>   * ira-color.c (init_update_cost_records): Use new type-based pool 
> allocator.
>   (get_update_cost_record): Likewise.
>   (free_update_cost_record_list): Likewise.
>   (finish_update_cost_records): Likewise.
>   (initiate_cost_update): Likewise.
> ---
>  gcc/ira-color.c | 19 +--
>  1 file changed, 5 insertions(+), 14 deletions(-)
> 
> diff --git a/gcc/ira-color.c b/gcc/ira-color.c
> index 4750714..4aec98e 100644
> --- a/gcc/ira-color.c
> +++ b/gcc/ira-color.c
> @@ -1166,16 +1166,8 @@ setup_profitable_hard_regs (void)
> allocnos.  */
>  
>  /* Pool for update cost records.  */
> -static alloc_pool update_cost_record_pool;
> -
> -/* Initiate update cost records.  */
> -static void
> -init_update_cost_records (void)
> -{
> -  update_cost_record_pool
> -= create_alloc_pool ("update cost records",
> -  sizeof (struct update_cost_record), 100);
> -}
> +static pool_allocator update_cost_record_pool
> +  ("update cost records", 100);

Am I right in thinking that this is a statically-allocated object with a
non-trivial constructor?  i.e. that this constructor has to run before
"main" is entered?

Do our coding guidelines allow for this?  (I've been burned by this
before, on a buggy C++ runtime that didn't manage to support these).
I'm a little nervous about this, touching global state before
"main" (e.g. from the point-of-view of the JIT), though I don't know yet
if this is just a gut reaction, or if there's a valid concern here (I'm
officially on holiday this week, so I haven't had a chance to dig deeply
into these patches yet, sorry).

[...snip...]

> @@ -1264,7 +1256,6 @@ initiate_cost_update (void)
>  = (struct update_cost_queue_elem *) ira_allocate (size);
>memset (update_cost_queue_elems, 0, size);
>update_cost_check = 0;
> -  init_update_cost_records ();
>  }

(for reference, this is where the manually-coded initialization call was
made)

Hope this is constructive
Dave



[PATCH] Simple fix to enhance outer-loop vectorization.

2015-05-28 Thread Yuri Rumyantsev
Hi All,

Here is a simple patch which removes restriction on outer-loop
vectorization -  allow references in inner-loop with zero step. This
case was found in one important benchmark.

Bootstrap and regression testing did not show any new failures.
Is it OK for trunk.

ChangeLog:
2015-05-28  Yuri Rumyantsev  

* tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
consecutive accesses within outer-loop vectorization for references
with zero step in inner-loop.

gcc/testsuite/ChangeLog:
* gcc.dg/vect/fast-math-vect-outer-1.c: New test.


patch
Description: Binary data


Re: [PATCH, RFC] New memory usage statistics infrastructure

2015-05-28 Thread Thomas Schwinge
Hi!

On Fri, 15 May 2015 16:38:40 +0200, Martin Liška  wrote:
> Following patch attempts to rewrite memory reports for GCC's internal 
> allocations
> [...]

(Got commtited to trunk in r223748.)

>   * hash-map-traits.h: New file.

In that one you added a copyright/licensing header, but...

>   * mem-stats-traits.h: New file.
>   * mem-stats.h: New file.

... in these two you didn't (but should):

> --- /dev/null
> +++ b/gcc/hash-map-traits.h
> @@ -0,0 +1,104 @@
> +/* A hash map traits.
> +   Copyright (C) 2015 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of [...]

> --- /dev/null
> +++ b/gcc/mem-stats-traits.h
> @@ -0,0 +1,20 @@
> +#ifndef GCC_MEM_STATS_TRAITS_H
> +#define GCC_MEM_STATS_TRAITS_H
> +
> +/* Memory allocation origin.  */
> +enum mem_alloc_origin
> +{
> +  HASH_TABLE,
> +  HASH_MAP,
> +  HASH_SET,
> +  VEC,
> +  BITMAP,
> +  GGC,
> +  MEM_ALLOC_ORIGIN_LENGTH
> +};
> +
> +/* Verbose names of the memory allocation origin.  */
> +static const char * mem_alloc_origin_names[] = { "Hash tables", "Hash maps", 
> "Hash sets",
> +  "Heap vectors", "Bitmaps", "GGC memory" };
> +
> +#endif // GCC_MEM_STATS_TRAITS_H

> --- /dev/null
> +++ b/gcc/mem-stats.h
> @@ -0,0 +1,535 @@
> +#ifndef GCC_MEM_STATS_H
> +#define GCC_MEM_STATS_H
> +
> +#include "hash-map-traits.h"
> +[...]


Grüße,
 Thomas


signature.asc
Description: PGP signature


Re: [patch] testsuite enable PIE tests on FreeBSD

2015-05-28 Thread Thomas Schwinge
Hi!

On Wed, 20 May 2015 14:30:38 -0600, Jeff Law  wrote:
> On 05/20/2015 11:04 AM, Andreas Tobler wrote:
> > the attached patch enables some PIE tests on FreeBSD.

> Wouldn't it be better to remove the target selector and instead add:
> 
> /* { dg-require-effective-target pie } */
> 
> In each of those tests?

(Got committed to trunk in r223498.)  Thanks!  I wanted to suggest
something along the same lines, because:

> While the net effect is the same today, it means there's only one place 
> to change if another x86 target gains PIE support in the future.

GNU Hurd got it, too.  :-)


Grüße,
 Thomas


signature.asc
Description: PGP signature


[gomp4] Expand OpenACC thread builtins inline

2015-05-28 Thread Julian Brown
For partitioned loops, we're currently calling library functions (in
libgcc) to determine the cardinality of the set of threads a particular
loop is distributed over (given a set of gang/worker/vector toggles),
and the index of the current thread within that set.

This patch reimplements those two functions in terms of the
(PTX-specific!) builtins that Bernd has recently added in order to
implement vector-single/worker-single predication, which expand
directly to machine instructions on the target (or to constant zero/one
on the host). It also makes use of the same "gwv" bitfields that are set
up by that new code.

The previous BUILT_IN_GOACC_GET_THREAD_NUM and
BUILT_IN_GOACC_GET_NUM_THREADS builtins are removed entirely.

This works reasonably well, but there are some regressions caused by
middle-end optimisers having extra freedom to manipulate the CFG in
ways that PTX cannot support without the "optimisation barrier" of the
calls to the thread builtins being present. This will be addressed by a
follow-on patch.

Pre-approved for gomp4, but I'll wait for comments on the follow-on
patch before applying so as not to leave the branch in a "broken" state.

Thanks,

Julian

ChangeLog

gcc/
* builtins.c (expand_oacc_builtin): Return const1_rtx for
ntid/nctaid builtins when the associated patterns are not present.
* omp-builtins.def (BUILT_IN_GOACC_GET_THREAD_NUM)
(BUILT_IN_GOACC_GET_NUM_THREADS): Remove.
* omp-low.c (struct omp_for_data): Remove gang, worker, vector
fields.
(extract_omp_for_data): Don't initialise deleted gang, worker,
vector fields.
(expand_oacc_get_num_threads, expand_oacc_get_thread_num): New
functions.
(lower_reduction_clauses): Use above functions.
(expand_omp_for_static_nochunk): Likewise.
(expand_omp_for_static_chunk): Likewise.
commit 1be8ada44a9f91d2eba16ef1f81243707647f237
Author: Julian Brown 
Date:   Fri May 15 03:20:42 2015 -0700

Inlined OpenACC thread builtins.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index ebd4b4a..cd51821 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5964,8 +5964,8 @@ expand_oacc_builtin (enum built_in_function fcode, tree exp, rtx target)
 case BUILT_IN_GOACC_NTID:
 #ifdef HAVE_oacc_ntid
   icode = CODE_FOR_oacc_ntid;
-  result = const1_rtx;
 #endif
+  result = const1_rtx;
   break;
 case BUILT_IN_GOACC_TID:
 #ifdef HAVE_oacc_tid
@@ -5975,8 +5975,8 @@ expand_oacc_builtin (enum built_in_function fcode, tree exp, rtx target)
 case BUILT_IN_GOACC_NCTAID:
 #ifdef HAVE_oacc_nctaid
   icode = CODE_FOR_oacc_nctaid;
-  result = const1_rtx;
 #endif
+  result = const1_rtx;
   break;
 case BUILT_IN_GOACC_CTAID:
 #ifdef HAVE_oacc_ctaid
diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index ac1f802..47d9e45 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -69,10 +69,6 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NCTAID, "GOACC_nctaid",
 		   BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_CTAID, "GOACC_ctaid",
 		   BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_THREAD_NUM, "GOACC_get_thread_num",
-		   BT_FN_INT_INT_INT_INT, ATTR_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_NUM_THREADS, "GOACC_get_num_threads",
-		   BT_FN_INT_INT_INT_INT, ATTR_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_GANGLOCAL_PTR, "GOACC_get_ganglocal_ptr",
 		   BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr",
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index b114887..f82247b 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -263,7 +263,6 @@ struct omp_for_data
   tree chunk_size;
   gomp_for *for_stmt;
   tree pre, iter_type;
-  tree gang, worker, vector;
   int collapse;
   bool have_nowait, have_ordered;
   enum omp_clause_schedule_kind sched_kind;
@@ -749,16 +748,6 @@ extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
   gcc_assert (fd->chunk_size == NULL_TREE);
   fd->chunk_size = build_int_cst (TREE_TYPE (fd->loop.v), 1);
 }
-
-  /* Extract the OpenACC gang, worker and vector clauses.  */
-  t = find_omp_clause (gimple_omp_for_clauses (for_stmt), OMP_CLAUSE_GANG);
-  fd->gang = (t == NULL_TREE) ? integer_zero_node : integer_one_node;
-
-  t = find_omp_clause (gimple_omp_for_clauses (for_stmt), OMP_CLAUSE_WORKER);
-  fd->worker = (t == NULL_TREE) ? integer_zero_node : integer_one_node;
-
-  t = find_omp_clause (gimple_omp_for_clauses (for_stmt), OMP_CLAUSE_VECTOR);
-  fd->vector = (t == NULL_TREE) ? integer_zero_node : integer_one_node;
 }
 
 
@@ -4919,6 +4908,159 @@ is_atomic_compatible_reduction (tree var, omp_context *ctx)
   return true;
 }
 
+
+/* Find the total number of threads used by a region partitioned by
+   GWV_BITS.  Setup code required for the calculation is added to SEQ.  Note
+   that this is currently used from both OMP-lowering and OMP-expansion phases,
+   and uses b

Re: [PATCH 13/14][ARM/AArch64 testsuite] Use gcc-dg-runtest in advsimd-intrinsics.exp

2015-05-28 Thread Christophe Lyon
On 28 May 2015 at 12:22, Alan Lawrence  wrote:
> Christophe Lyon wrote:
>>
>> On 26 May 2015 at 18:25, Alan Lawrence  wrote:
>>>
>>> I don't see this symptom - I am able to execute such subsets with either
>>> my,
>>> or Sandra's, advsimd-intrinsics.exp.
>>
>>
>> I didn't try to run with your patch, I thought it was an oversight of
>> yours.
>>
>> Sorry, indeed I've just checked that gcc-dg-runtest includes the filter.
>>
>>> Is it that you have to check runtest_file_p because you are setting
>>> gcc_parallel_test_enable to 0?
>>>
>>> I'm doing more testing now, but I think I can drop my
>>> advsimd-intrinsics.exp
>>> changes altogether; I'll post an updated patch series shortly.
>>>
>>> In the meantime I'm curious as to why you found the
>>> gcc_parallel_test_enable
>>> necessary? (And is it safe to reset it to 1 afterwards, rather than to a
>>> saved value?)
>>
>> See https://gcc.gnu.org/ml/gcc/2014-10/msg00081.html
>
>
> So after working through the differences between Sandra's and my patch, I
> find the existing advsimd-intrinsics.exp achieves pretty much the same
> thing, and preserves the same list of test variants (e.g. the -Og -g from
> set-torture-options which I had removed).
>
> However, I've tried testing advsimd-intrinsics.exp (both the whole thing,
> and individual tests using RUNTESTFLAGS) with and without this hunk:
>
> @@ -57,20 +57,7 @@ set-torture-options $C_TORTURE_OPTIONS {{}}
> $LTO_TORTURE_OPTI
>  set additional_flags [add_options_for_arm_neon ""]
>
>  # Main loop.
> -foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
> -# If we're only testing specific files and this isn't one of them, skip
> it.
> -if ![runtest_file_p $runtests $src] then {
> -   continue
> -}
> -
> -# runtest_file_p is already run above, and the code below can run
> -# runtest_file_p again, make sure everything for this test is
> -# performed if the above runtest_file_p decided this runtest
> -# instance should execute the test
> -gcc_parallel_test_enable 0
> -gcc-dg-runtest $src "" $additional_flags
> -gcc_parallel_test_enable 1
> -}
> +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] ""
> ${additional_flags}
>
> and find exactly the same tests are run and pass. My hypothesis is thus that
> you only need the explicit loop, manual checking of runtest_file_p, and
> gcc_parallel_test_enable, in order to do *both* c-torture-execute *and*
> gcc-dg-runtest; since we are now only doing the latter, this is unnecessary.
> Does that make sense? (If you agree, I'll propose that as a standalone
> cleanup patch.)
>

Indeed I think you are right. Since we no longer call
c-torture-execute, we no longer need to call runtest_file_p here.
Having only one remaining call to runtest_file_p in gcc-dg-runtest is
parallel-safe. Thanks for the cleanup.

Christophe.

> Cheers, Alan
>


Re: [PATCH] Simple fix to enhance outer-loop vectorization.

2015-05-28 Thread Richard Biener
On Thu, May 28, 2015 at 1:00 PM, Yuri Rumyantsev  wrote:
> Hi All,
>
> Here is a simple patch which removes restriction on outer-loop
> vectorization -  allow references in inner-loop with zero step. This
> case was found in one important benchmark.
>
> Bootstrap and regression testing did not show any new failures.
> Is it OK for trunk.
>
> ChangeLog:
> 2015-05-28  Yuri Rumyantsev  
>
> * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
> consecutive accesses within outer-loop vectorization for references
> with zero step in inner-loop.
>
> gcc/testsuite/ChangeLog:
> * gcc.dg/vect/fast-math-vect-outer-1.c: New test.

Can you please add a non-omp-simd testcase that triggers this as well and that
is a runtime testcase verifying the transform is correct?

Also please don't add to the strange testcase-name machinery but just
use { dg-additional-options "-ffast-math" }

Index: tree-vect-data-refs.c
===
--- tree-vect-data-refs.c   (revision 223653)
+++ tree-vect-data-refs.c   (working copy)
@@ -2261,7 +2261,6 @@
   return true;
 }

-
 /* Analyze the access pattern of the data-reference DR.
In case of non-consecutive accesses call vect_analyze_group_access() to
analyze groups of accesses.  */

spurious white-space change


@@ -2291,14 +2290,8 @@
   if (loop_vinfo && integer_zerop (step))

Surely the comment before this needs updating now.

 {
   GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
-  if (nested_in_vect_loop_p (loop, stmt))
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_NOTE, vect_location,
-"zero step in inner loop of nest\n");
- return false;
-   }
-  return DR_IS_READ (dr);
+  if (!nested_in_vect_loop_p (loop, stmt))
+   return DR_IS_READ (dr);
 }

   if (loop && nested_in_vect_loop_p (loop, stmt))

so what happens after the patch?  It would be nice to have a comment
explaining what happens in the nested_in_vect_loop_p case for
the case when the outer-loop step is zero and when it is not zero.

In particular as you don't need any code generation changes - this hints
at that you may miss something ;)

Otherwise of course the patch is ok - lifting restrictions is good.

Thanks,
Richard.


Re: acc_on_device for device_type_host_nonshm

2015-05-28 Thread H.J. Lu
On Thu, May 21, 2015 at 4:10 AM, Jakub Jelinek  wrote:
> On Thu, May 21, 2015 at 01:02:12PM +0200, Thomas Schwinge wrote:
>> Hi!
>>
>> On Thu, 7 May 2015 19:32:26 +0100, Julian Brown  
>> wrote:
>> > Here's a new version of the patch [...]
>>
>> > OK for trunk?
>>
>> Makes sense to me (with just a request to drop the testsuite changes, see
>> below), to get the existing regressions under control.  Jakub?
>
> Ok for trunk.
>>
>> > PR libgomp/65742
>> >
>> > gcc/
>> > * builtins.c (expand_builtin_acc_on_device): Don't use open-coded
>> > sequence for !ACCEL_COMPILER.
>> >

It breaks bootstrap on x86:

https://gcc.gnu.org/ml/gcc-regression/2015-05/msg00389.html

I checked in this to fix it.

-- 
H.J.
---
Index: gcc/ChangeLog
===
--- gcc/ChangeLog (revision 223804)
+++ gcc/ChangeLog (working copy)
@@ -1,3 +1,8 @@
+2015-05-28  H.J. Lu  
+
+ * builtins.c (expand_builtin_acc_on_device): Mark parameters
+ with ATTRIBUTE_UNUSED.
+
 2015-05-28  Julian Brown  

  PR libgomp/65742
Index: gcc/builtins.c
===
--- gcc/builtins.c (revision 223804)
+++ gcc/builtins.c (working copy)
@@ -5911,7 +5911,8 @@
acceleration device (ACCEL_COMPILER conditional).  */

 static rtx
-expand_builtin_acc_on_device (tree exp, rtx target)
+expand_builtin_acc_on_device (tree exp ATTRIBUTE_UNUSED,
+  rtx target ATTRIBUTE_UNUSED)
 {
 #ifdef ACCEL_COMPILER
   if (!validate_arglist (exp, INTEGER_TYPE, VOID_TYPE))


Re: [Patch V2]: libbacktrace - add support of PE/COFF

2015-05-28 Thread Tristan Gingold
Hello,

>> #define BACKTRACE_SUPPORTS_THREADS @BACKTRACE_SUPPORTS_THREADS@
>> +
>> +/* BACKTRACE_SUPPORTS_DATA will be #defined'd as 1 if the backtrace library
>> +   also handles data symbols, 0 if not.  */
>> +
>> +#define BACKTRACE_SUPPORTS_DATA @BACKTRACE_SUPPORTS_DATA@
> 
> End users are expected to read and understand this file, so I think
> this comment is too obscure.  I suggest:
> 
> BACKTRACE_SUPPORTS_DATA will be #define'd as 1 if backtrace_syminfo
> will work for variables.  It will always work for functions.

The comment is now replaced by your wording.

> I would have thought you could distinguish relevant symbols using the
> storage class and type.  But perhaps not.

Not that easily, unfortunately.  Section names also appear like data
symbols, and furthermore linker script symbol made btest failing.
But we could revisit this issue later.

>> diff --git a/libbacktrace/filetype.awk b/libbacktrace/filetype.awk
>> index 0a656f7..37099ad 100644
>> --- a/libbacktrace/filetype.awk
>> +++ b/libbacktrace/filetype.awk
>> @@ -1,3 +1,4 @@
>> # An awk script to determine the type of a file.
>> /\177ELF\001/ { if (NR == 1) { print "elf32"; exit } }
>> /\177ELF\002/ { if (NR == 1) { print "elf64"; exit } }
>> +/\114\001/{ if (NR == 1) { print "pecoff"; exit } }
> 
> That works for 32-bit, but I think not for 64-bit.  For 64-bit I would
> expect \144\206.

Fixed.

>> +#include 
> 
> Where is  going to come from when building a
> cross-compiler?  I think this needs to be removed.  I see that you
> define the structs yourself, as you should, so why do you need
> ?

Indeed, windows.h is not needed, so I have removed it.

>> +/* Read a potentially unaligned 2 byte word at P, using native endianness.  
>> */
> 
> Is there really ever a case where a 2 byte word might be misaligned?

Good remark.  I have changed the comment.

>> +/* Return true iff SYM is a defined symbol for a function.  Data symbols
>> +   are discarded because they aren't easily identified.  */
>> +
>> +static int
>> +coff_is_symbol (const b_coff_internal_symbol *isym)
>> +{
>> +  return isym->type == 0x20 && isym->sec > 0;
>> +}
> 
> Is this really right?  This seems to test for DT_FCN set, but won't a
> function returning, say, int, have type 0x24 (DT_FCN << N_TBSHFT) | T_INT?

According to MS doc, only 0x20 or 0x00 is used.  But I have changed the doc
for clarity.

> Also, the name isn't right--this is coff_is_function_symbol.

Changed.

> 
>> +  if (coff_expand_symbol (&isym, asym, sects_num, strtab, strtab_size) 
>> < 0)
>> +   {
>> + error_callback (data, "invalid coff symbol", 0);
>> + return 0;
>> +   }
> 
> That's not a very useful error message--can you be more specific?

It is now more specific (although such error should never happen).

>> +  /* Allocate memory for symbols are strings.  */
> 
> Comment looks wrong--omit "are”?

Yes.

Here is the new version of the patch.

Regards,
Tristan.

libbacktrace/
2015-05-21  Tristan Gingold  

* pecoff.c: New file.
* Makefile.am (FORMAT_FILES): Add pecoff.c and dependencies.
* Makefile.in: Regenerate.
* filetype.awk: Detect pecoff.
* configure.ac: Define BACKTRACE_SUPPORTS_DATA on elf platforms.
Add pecoff.
* btest.c (test5): Test enabled only if BACKTRACE_SUPPORTS_DATA is
true.
* backtrace-supported.h.in (BACKTRACE_SUPPORTS_DATA): Define.
* configure: Regenerate.
* pecoff.c: New file.

commit fe0f364bf5836dea2aacb6d963c782d12c4d5561
Author: Tristan Gingold 
Date:   Thu May 21 14:29:44 2015 +0200

Add support for PE/COFF to libbacktrace.

diff --git a/libbacktrace/ChangeLog b/libbacktrace/ChangeLog
index c6604d9..139521a 100644
--- a/libbacktrace/ChangeLog
+++ b/libbacktrace/ChangeLog
@@ -1,3 +1,17 @@
+2015-05-21  Tristan Gingold  
+
+   * pecoff.c: New file.
+   * Makefile.am (FORMAT_FILES): Add pecoff.c and dependencies.
+   * Makefile.in: Regenerate.
+   * filetype.awk: Detect pecoff.
+   * configure.ac: Define BACKTRACE_SUPPORTS_DATA on elf platforms.
+   Add pecoff.
+   * btest.c (test5): Test enabled only if BACKTRACE_SUPPORTS_DATA is
+   true.
+   * backtrace-supported.h.in (BACKTRACE_SUPPORTS_DATA): Define.
+   * configure: Regenerate.
+   * pecoff.c: New file.
+
 2015-05-13  Michael Haubenwallner  
 
* Makefile.in: Regenerated with automake-1.11.6.
diff --git a/libbacktrace/Makefile.am b/libbacktrace/Makefile.am
index a93b82a..c5f0dcb 100644
--- a/libbacktrace/Makefile.am
+++ b/libbacktrace/Makefile.am
@@ -56,6 +56,7 @@ BACKTRACE_FILES = \
 
 FORMAT_FILES = \
elf.c \
+   pecoff.c \
unknown.c
 
 VIEW_FILES = \
@@ -124,6 +125,7 @@ fileline.lo: config.h backtrace.h internal.h
 mmap.lo: config.h backtrace.h internal.h
 mmapio.lo: config.h backtrace.h internal.h
 nounwind.lo: config.h internal.h
+pecoff.lo: config.h backtrace.h internal.h
 posix.lo: config.h backtrace.h internal.h
 print.lo: con

[patch] libstdc++/65352 fix ubsan errors in std::array

2015-05-28 Thread Jonathan Wakely

Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::array anyway.

Tested powerpc64le-linux, committed to trunk.

commit 0d999cf16b8f6a0d9bbf4bfe96b29e7b73a259e4
Author: Jonathan Wakely 
Date:   Thu May 28 12:21:36 2015 +0100

	PR libstdc++/65352
	* include/std/array (__array_traits::_S_ptr): New function.
	(array::data): Use _S_ptr to avoid creating invalid reference.
	* testsuite/23_containers/array/tuple_interface/get_neg.cc: Adjust
	dg-error line numbers.
	* testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc:
	likewise.

diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array
index 429506b..24be44f 100644
--- a/libstdc++-v3/include/std/array
+++ b/libstdc++-v3/include/std/array
@@ -51,6 +51,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   static constexpr _Tp&
   _S_ref(const _Type& __t, std::size_t __n) noexcept
   { return const_cast<_Tp&>(__t[__n]); }
+
+  static constexpr _Tp*
+  _S_ptr(const _Type& __t) noexcept
+  { return const_cast<_Tp*>(__t); }
 };
 
  template
@@ -61,6 +65,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
  static constexpr _Tp&
  _S_ref(const _Type&, std::size_t) noexcept
  { return *static_cast<_Tp*>(nullptr); }
+
+ static constexpr _Tp*
+ _S_ptr(const _Type&) noexcept
+ { return nullptr; }
};
 
   /**
@@ -219,11 +227,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
   pointer
   data() noexcept
-  { return std::__addressof(_AT_Type::_S_ref(_M_elems, 0)); }
+  { return _AT_Type::_S_ptr(_M_elems); }
 
   const_pointer
   data() const noexcept
-  { return std::__addressof(_AT_Type::_S_ref(_M_elems, 0)); }
+  { return _AT_Type::_S_ptr(_M_elems); }
 };
 
   // Array comparisons.
diff --git a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
index 7604412..6830964 100644
--- a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
@@ -28,6 +28,6 @@ int n1 = std::get<1>(a);
 int n2 = std::get<1>(std::move(a));
 int n3 = std::get<1>(ca);
 
-// { dg-error "static assertion failed" "" { target *-*-* } 274 }
-// { dg-error "static assertion failed" "" { target *-*-* } 283 }
+// { dg-error "static assertion failed" "" { target *-*-* } 282 }
 // { dg-error "static assertion failed" "" { target *-*-* } 291 }
+// { dg-error "static assertion failed" "" { target *-*-* } 299 }
diff --git a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
index 9788053..5d75366 100644
--- a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
@@ -23,4 +23,4 @@
 
 typedef std::tuple_element<1, std::array>::type type;
 
-// { dg-error "static assertion failed" "" { target *-*-* } 322 }
+// { dg-error "static assertion failed" "" { target *-*-* } 330 }


Re: [Patch]: libbacktrace - add support of PE/COFF

2015-05-28 Thread Tristan Gingold

> On 27 May 2015, at 15:36, Jeff Law  wrote:

>> +static int
>> +coff_is_symbol (const b_coff_internal_symbol *isym)
>> +{
>> +  return isym->type == 0x20 && isym->sec > 0;
>> +}
> You probably want const or enum so that you can have a symbolic name rather 
> than 0x20 here.  It also seems like the name ought to better indicate it's 
> testing for function symbols.

Yes, this is now changed.

> It's a given  that you know COFF specifics better than I ever did, so I'm 
> comfortable assuming you got the COFF specifics right.
> 
> The overall structure of elf.c & coff.c is the same with code templates that 
> are very similar, except they work on different underlying types.  Presumably 
> there wasn't a good way to factor any of the generic looking bits out?  And 
> no, I'm not requesting you rewrite all this in BFD :-)

The dummy callback could indeed be easily shared.  For the remaining, that’s 
not so simple given the types.  Maybe we can create a ‘C class’ for symbol 
infos.

Tristan.



Re: [PATCH/RFC] Make loop-header-copying more aggressive, rerun before tree-if-conversion

2015-05-28 Thread Richard Biener
On Fri, May 22, 2015 at 5:42 PM, Alan Lawrence  wrote:
> This example which I wrote to test ifconversion, currently fails to
> if-convert or vectorize:
>
> int foo ()
> {
>   for (int i = 0; i < 32 ; i++)
> {
>   int m = (a[i] & i) ? 5 : 4;
>   b[i] = a[i] * m;
> }
> }
>
> ...because jump-threading in dom1 rearranged the loop into a form that
> neither if-conversion nor vectorization would attempt. Discussion at
> https://gcc.gnu.org/ml/gcc/2015-04/msg00343.html lead to the suggestion that
> I should rerun loop-header copying (an earlier attempt to fix ifconversion,
> https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01743.html, still did not
> enable vectorization.)
>
> This patch does so (and makes slightly less conservative, to tackle the
> example above). I found I had to make this a separate pass, so that the phi
> nodes were cleaned up at the end of the pass before running
> tree_if_conversion. Also at this stage in the compiler (inside loop opts) it
> was not possible to run loop_optimizer_init+finalize, or other
> loop_optimizer data structures needed later would be deleted; hence, I have
> two nearly-but-not-quite-identical passes, the new "ch_vect" avoiding the
> init/finalize. I tried to tackle this with some C++ subclassing, which
> removes the duplication, but the result feels a little ugly; suggestions for
> any neater approach welcome.
>
> This patch causes failure of the scan-tree-dump of dom2 in
> gcc.dg/ssa/pr21417.c. This looks for jump-threading to perform an
> optimization, but no longer finds the expected line in the log - as the
> loop-header-copying phase has already done an equivalent transformation
> *before* dom2. The final CFG is thus in the desired form, but I'm not sure
> how to determine this (scanning the CFG itself is very difficult, well
> beyond what we can do with regex, requiring looking at multiple lines and
> basic blocks). Can anyone advise? [The test issue can be worked around by
> preserving the old do_while_p logic for the first header-copying pass, and
> using the new logic only for the second, but this is more awkward inside the
> compiler, which feels wrong.]
>
> Besides the new vect-ifcvt-11.c, the testsuite actually has a couple of
> other examples where this patch enables (undesired!) vectorization. I've
> dealt with these, but for the record:
> * gcc.dg/vect/slp-perm-7.c: the initialization loop in main,
> contained a check that input[i] < 200; this was already optimized out
> (because input[i] was set to i%256, where i that loop was not vectorized because:
> /work/alalaw01/oban/srcfsf/gcc/gcc/testsuite/gcc.dg/vect/slp-perm-7.c:54:3:
> note: not vectorized: latch block not empty.
> /work/alalaw01/oban/srcfsf/gcc/gcc/testsuite/gcc.dg/vect/slp-perm-7.c:54:3:
> note: bad loop form.
>
> * gcc.dg/vect/vect-strided-a-u16-i4.c: the main1() function has
> three loops; the first (initialization) has an 'if (y) abort() /* Avoid
> vectorization.  */'. However, the 'volatile int y = 0' this was meant to
> reference, is actually shadowed by a local non-volatile; the test is thus
> peeled off and absent from the body of the loop. The loop only avoided
> vectorization because of non-empty latch and bad loop form, as previous.
>
> With this patch, both those loops now have good form, hence I have fixed
> both to check a global volatile to prevent these extraneous parts from being
> vectorized.
>
> Tested with bootstrap + check-gcc on x86_64 and AArch64 (linux). As noted
> above, this causes a spurious PASS->FAIL of a scan-tree-dump test, which I'm
> unsure how to fix, but no other regressions.

Apart from Jeffs comment - the usual fix for the undesired
vectorization is to put
a __asm__ volatile (""); in the loop.

+  /* If any block in the loop has an exit edge, and code after it, it is
+ not a do-while loop.  */
+  basic_block *body = get_loop_body (loop);
+  for (unsigned i = 0; i < loop->num_nodes; i++)

wouldn't it be easier to verify that the predecessor of the loop latch
contains the (only) loop exit?

Like

   e = single_exit (loop);
   if (!e)
 return true;

   if (single_exit (loop)->pred != single_pred (loop->latch))
 return false;

?  In fact I think that even for multiple exists we want the latch predecessor
have an exit (though the vectorizer or if-conversion don't deal with that).

Note that single_exit () only works when the loop state has
LOOPS_HAVE_RECORDED_EXITS
thus it might be easier to simply check

  FOR_EACH_EDGE (... single_pred (loop->latch)->succs ..)
 if (e->dest == loop->latch)
   ;
 else
   break;
  if (!e || !loop_exit_edge_p (loop, e))
return true;

which should work always.

Coding-style wise, can you please move the "common" pass_ch_vect::execute out
of the pass_ch_vect class?

  unsigned int res = pass_ch_vect::execute (fun);

looks ugly, as well as deriving pass_ch from pass_ch_vect.  I think pass_ch_vect
should be only executed if flag_tree_loop_vectorize is enabled.

   loop_op

Re: [Patch]: libbacktrace - add support of PE/COFF

2015-05-28 Thread Tristan Gingold

> On 28 May 2015, at 02:26, Ian Lance Taylor  wrote:

> The #include  will break cross-compilers.  It's not OK for
> trunk until that is fixed.

I am confused by this comment, for two reasons:

- I don’t see how that would break cross-compilers.  Cross compilers
 hosted on windows are not impacted by this include when the library is
 used for the tools.  When then backtrace library is used for the target,
 pecoff is not used unless the target is windows.
 So I don’t see a case where the include breaks cross-compilers.

- If the case exists, I don’t see how to implement backtrace within
 shared libraries: I need a windows specific function to get the list
 of DLL.

Tristan.





Re: [PATCH, RFC] fortran [was Re: #pragma GCC unroll support]

2015-05-28 Thread Mike Stump
On May 28, 2015, at 2:02 AM, Bernhard Reutner-Fischer  
wrote:
> 
> Does anybody have a better suggestion?
> 
> directive not at the start of a loop at %C
> directive not followed by a loop at %C

I prefer either of these.  I have a slight preference for the first.

> Mike, did you tweak the one or two things you got from the reviews
> yet?

Nope.

> ISTM your main patch was not OKed yet nor installed.

Been busy with work.  I’ll come back and address the nits that people pointed 
out and see if I can ping it some more and try and get the C++ bits reviewed.

Re: [PATCH 13/14][ARM/AArch64 testsuite] Use gcc-dg-runtest in advsimd-intrinsics.exp

2015-05-28 Thread Christophe Lyon
On 28 May 2015 at 13:32, Christophe Lyon  wrote:
> On 28 May 2015 at 12:22, Alan Lawrence  wrote:
>> Christophe Lyon wrote:
>>>
>>> On 26 May 2015 at 18:25, Alan Lawrence  wrote:

 I don't see this symptom - I am able to execute such subsets with either
 my,
 or Sandra's, advsimd-intrinsics.exp.
>>>
>>>
>>> I didn't try to run with your patch, I thought it was an oversight of
>>> yours.
>>>
>>> Sorry, indeed I've just checked that gcc-dg-runtest includes the filter.
>>>
 Is it that you have to check runtest_file_p because you are setting
 gcc_parallel_test_enable to 0?

 I'm doing more testing now, but I think I can drop my
 advsimd-intrinsics.exp
 changes altogether; I'll post an updated patch series shortly.

 In the meantime I'm curious as to why you found the
 gcc_parallel_test_enable
 necessary? (And is it safe to reset it to 1 afterwards, rather than to a
 saved value?)
>>>
>>> See https://gcc.gnu.org/ml/gcc/2014-10/msg00081.html
>>
>>
>> So after working through the differences between Sandra's and my patch, I
>> find the existing advsimd-intrinsics.exp achieves pretty much the same
>> thing, and preserves the same list of test variants (e.g. the -Og -g from
>> set-torture-options which I had removed).
>>
>> However, I've tried testing advsimd-intrinsics.exp (both the whole thing,
>> and individual tests using RUNTESTFLAGS) with and without this hunk:
>>
>> @@ -57,20 +57,7 @@ set-torture-options $C_TORTURE_OPTIONS {{}}
>> $LTO_TORTURE_OPTI
>>  set additional_flags [add_options_for_arm_neon ""]
>>
>>  # Main loop.
>> -foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
>> -# If we're only testing specific files and this isn't one of them, skip
>> it.
>> -if ![runtest_file_p $runtests $src] then {
>> -   continue
>> -}
>> -
>> -# runtest_file_p is already run above, and the code below can run
>> -# runtest_file_p again, make sure everything for this test is
>> -# performed if the above runtest_file_p decided this runtest
>> -# instance should execute the test
>> -gcc_parallel_test_enable 0
>> -gcc-dg-runtest $src "" $additional_flags
>> -gcc_parallel_test_enable 1
>> -}
>> +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] ""
>> ${additional_flags}
>>
>> and find exactly the same tests are run and pass. My hypothesis is thus that
>> you only need the explicit loop, manual checking of runtest_file_p, and
>> gcc_parallel_test_enable, in order to do *both* c-torture-execute *and*
>> gcc-dg-runtest; since we are now only doing the latter, this is unnecessary.
>> Does that make sense? (If you agree, I'll propose that as a standalone
>> cleanup patch.)
>>
>
> Indeed I think you are right. Since we no longer call
> c-torture-execute, we no longer need to call runtest_file_p here.
> Having only one remaining call to runtest_file_p in gcc-dg-runtest is
> parallel-safe. Thanks for the cleanup.
>

So in fact, except for the comment about '-w' it seems you initial
patch was mostly OK, right?

> Christophe.
>
>> Cheers, Alan
>>


[PATCH] Optimize (CST1 << A) == CST2 (PR tree-optimization/66299)

2015-05-28 Thread Marek Polacek
This PR points out that we weren't able to optimize 1 << x == 2 to just
x == 1.  This is my attempt to fix that: if we see (CST1 << A) == CST2
and CST2 is a multiple of CST1, use log2 to get rid of the shift, but
only if the result of the shift is a natural number (including zero).

If CST2 is not a multiple of CST1, then the whole expression can be
discarded, but I'd like to do that as a follow-up.
(It would help if our current match.pd grammar allowed us to use "else",
any plans on doing that?)

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-05-28  Marek Polacek  

PR tree-optimization/66299
* match.pd ((CST1 << A) == CST2 -> A == log2 (CST2 / CST1),
(CST1 << A) != CST2 -> A != log2 (CST2 / CST1)): New
patterns.

* gcc.dg/pr66299-1.c: New test.
* gcc.dg/pr66299-2.c: New test.

diff --git gcc/match.pd gcc/match.pd
index abd7851..5d07a70 100644
--- gcc/match.pd
+++ gcc/match.pd
@@ -676,6 +676,19 @@ along with GCC; see the file COPYING3.  If not see
   (cmp (bit_and (lshift integer_onep @0) integer_onep) integer_zerop)
   (icmp @0 { build_zero_cst (TREE_TYPE (@0)); })))
 
+/* (CST1 << A) == CST2 -> A == log2 (CST2 / CST1)
+   (CST1 << A) != CST2 -> A != log2 (CST2 / CST1)
+   if CST2 is a multiple of CST1.  */
+(for cmp (ne eq)
+ (simplify
+  (cmp (lshift@3 INTEGER_CST@0 @1) INTEGER_CST@2)
+  (if ((TREE_CODE (@3) != SSA_NAME || has_single_use (@3))
+   && wi::multiple_of_p (@2, @0, TYPE_SIGN (type)))
+   (with {
+int shift = wi::exact_log2 (wi::div_trunc (@2, @0, TYPE_SIGN (type))); }
+   (if (shift != -1)
+(cmp @1 { build_int_cst (TREE_TYPE (@1), shift); }))
+
 /* Simplifications of conversions.  */
 
 /* Basic strip-useless-type-conversions / strip_nops.  */
diff --git gcc/testsuite/gcc.dg/pr66299-1.c gcc/testsuite/gcc.dg/pr66299-1.c
index e69de29..9d41275 100644
--- gcc/testsuite/gcc.dg/pr66299-1.c
+++ gcc/testsuite/gcc.dg/pr66299-1.c
@@ -0,0 +1,83 @@
+/* PR tree-optimization/66299 */
+/* { dg-do run } */
+/* { dg-options "-fdump-tree-original" } */
+
+void
+test1 (int x)
+{
+  if ((0 << x) != 0
+  || (1 << x) != 2
+  || (2 << x) != 4
+  || (3 << x) != 6
+  || (4 << x) != 8
+  || (5 << x) != 10
+  || (6 << x) != 12
+  || (7 << x) != 14
+  || (8 << x) != 16
+  || (9 << x) != 18
+  || (10 << x) != 20)
+__builtin_abort ();
+}
+
+void
+test2 (int x)
+{
+  if (!((0 << x) == 0
+&& (1 << x) == 4
+&& (2 << x) == 8
+&& (3 << x) == 12
+&& (4 << x) == 16
+&& (5 << x) == 20
+&& (6 << x) == 24
+&& (7 << x) == 28
+&& (8 << x) == 32
+&& (9 << x) == 36
+   && (10 << x) == 40))
+__builtin_abort ();
+}
+
+void
+test3 (unsigned int x)
+{
+  if ((0U << x) != 0U
+  || (1U << x) != 16U
+  || (2U << x) != 32U
+  || (3U << x) != 48U
+  || (4U << x) != 64U
+  || (5U << x) != 80U
+  || (6U << x) != 96U
+  || (7U << x) != 112U
+  || (8U << x) != 128U
+  || (9U << x) != 144U
+  || (10U << x) != 160U)
+__builtin_abort ();
+}
+
+void
+test4 (unsigned int x)
+{
+  if (!((0U << x) == 0U
+   || (1U << x) == 8U
+   || (2U << x) == 16U
+   || (3U << x) == 24U
+   || (4U << x) == 32U
+   || (5U << x) == 40U
+   || (6U << x) == 48U
+   || (7U << x) == 56U
+   || (8U << x) == 64U
+   || (9U << x) == 72U
+   || (10U << x) == 80U))
+__builtin_abort ();
+}
+
+int
+main (void)
+{
+  test1 (1);
+  test2 (2);
+  test3 (4U);
+  test4 (3U);
+}
+
+/* { dg-final { scan-tree-dump-not "<<" "original" } } */
+/* { dg-final { cleanup-tree-dump "original" } } */
diff --git gcc/testsuite/gcc.dg/pr66299-2.c gcc/testsuite/gcc.dg/pr66299-2.c
index e69de29..dde0549 100644
--- gcc/testsuite/gcc.dg/pr66299-2.c
+++ gcc/testsuite/gcc.dg/pr66299-2.c
@@ -0,0 +1,34 @@
+/* PR tree-optimization/66299 */
+/* { dg-do run } */
+/* { dg-options "-fdump-tree-optimized -O" } */
+
+void
+test1 (int x, unsigned u)
+{
+  if ((1U << x) != 64
+  || (2 << x) != u
+  || (x << x) != 384
+  || (3 << x) == 9
+  || (x << 14) != 98304U
+  || (1 << x) == 14
+  || (3 << 2) != 12)
+__builtin_abort ();
+}
+
+void
+test2 (int x)
+{
+  unsigned int t = ((unsigned int) 1U << x);
+  if (t != 2U)
+__builtin_abort ();
+}
+
+int
+main (void)
+{
+  test1 (6, 128U);
+  test2 (1);
+}
+
+/* { dg-final { scan-tree-dump-not "<<" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */

Marek


Re: [PATCH, RFC] New memory usage statistics infrastructure

2015-05-28 Thread Martin Liška

On 05/28/2015 01:15 PM, Thomas Schwinge wrote:

Hi!

On Fri, 15 May 2015 16:38:40 +0200, Martin Liška  wrote:

Following patch attempts to rewrite memory reports for GCC's internal 
allocations
[...]


(Got commtited to trunk in r223748.)


* hash-map-traits.h: New file.


In that one you added a copyright/licensing header, but...


* mem-stats-traits.h: New file.
* mem-stats.h: New file.


... in these two you didn't (but should):


--- /dev/null
+++ b/gcc/hash-map-traits.h
@@ -0,0 +1,104 @@
+/* A hash map traits.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of [...]



--- /dev/null
+++ b/gcc/mem-stats-traits.h
@@ -0,0 +1,20 @@
+#ifndef GCC_MEM_STATS_TRAITS_H
+#define GCC_MEM_STATS_TRAITS_H
+
+/* Memory allocation origin.  */
+enum mem_alloc_origin
+{
+  HASH_TABLE,
+  HASH_MAP,
+  HASH_SET,
+  VEC,
+  BITMAP,
+  GGC,
+  MEM_ALLOC_ORIGIN_LENGTH
+};
+
+/* Verbose names of the memory allocation origin.  */
+static const char * mem_alloc_origin_names[] = { "Hash tables", "Hash maps", "Hash 
sets",
+  "Heap vectors", "Bitmaps", "GGC memory" };
+
+#endif // GCC_MEM_STATS_TRAITS_H



--- /dev/null
+++ b/gcc/mem-stats.h
@@ -0,0 +1,535 @@
+#ifndef GCC_MEM_STATS_H
+#define GCC_MEM_STATS_H
+
+#include "hash-map-traits.h"
+[...]



Grüße,
  Thomas



Hello.

Thank you for pointing about missing copyright.
Following patch adds that.

Ready for trunk?
Thanks,
Martin
>From 2abc4116e3a941dbd8e0cff22698b979c9c1c830 Mon Sep 17 00:00:00 2001
From: mliska 
Date: Thu, 28 May 2015 14:24:58 +0200
Subject: [PATCH] Missing copyright for mem-stats header files.

gcc/ChangeLog:

2015-05-28  Martin Liska  

	* mem-stats-traits.h: Add copyright header.
	* mem-stats.h: Likewise.
---
 gcc/mem-stats-traits.h | 20 
 gcc/mem-stats.h| 20 
 2 files changed, 40 insertions(+)

diff --git a/gcc/mem-stats-traits.h b/gcc/mem-stats-traits.h
index de1614e..c5cb84a 100644
--- a/gcc/mem-stats-traits.h
+++ b/gcc/mem-stats-traits.h
@@ -1,3 +1,23 @@
+/* A memory statistics traits.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Martin Liska  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
 #ifndef GCC_MEM_STATS_TRAITS_H
 #define GCC_MEM_STATS_TRAITS_H
 
diff --git a/gcc/mem-stats.h b/gcc/mem-stats.h
index ac47231..9e3dec0 100644
--- a/gcc/mem-stats.h
+++ b/gcc/mem-stats.h
@@ -1,3 +1,23 @@
+/* A memory statistics tracking infrastructure.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Martin Liska  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
 #ifndef GCC_MEM_STATS_H
 #define GCC_MEM_STATS_H
 
-- 
2.1.4



Re: [PATCH v2] Handle OS X deployment targets correctly

2015-05-28 Thread Mike Stump
On May 20, 2015, at 2:57 PM, Lawrence Velázquez  wrote:
> 2015-05-15  Lawrence Velázquez  
> 
>   PR target/63810
>   * gcc/config/darwin-c.c (version_components): New global enum.
>   (parse_version, version_as_legacy_macro)
>   (version_as_modern_macro, macosx_version_as_macro): New functions.
>   (version_as_macro): Remove.
>   (darwin_cpp_builtins): Use new function.
>   * gcc/testsuite/gcc.dg/darwin-minversion-3.c: Update testcase.
>   * gcc/testsuite/gcc.dg/darwin-minversion-4.c: Ditto.
>   * gcc/testsuite/gcc.dg/darwin-minversion-5.c: New testcase.
>   * gcc/testsuite/gcc.dg/darwin-minversion-6.c: Ditto.
>   * gcc/testsuite/gcc.dg/darwin-minversion-7.c: Ditto.
>   * gcc/testsuite/gcc.dg/darwin-minversion-8.c: Ditto.
>   * gcc/testsuite/gcc.dg/darwin-minversion-9.c: Ditto.
>   * gcc/testsuite/gcc.dg/darwin-minversion-10.c: Ditto.
>   * gcc/testsuite/gcc.dg/darwin-minversion-11.c: Ditto.
>   * gcc/testsuite/gcc.dg/darwin-minversion-12.c: Ditto.

Committed revision 223808.

Thanks for all your work.

Re: [patch] Make std::string default constructor conditionally noexcept

2015-05-28 Thread Jonathan Wakely

On 13/05/15 14:36 +0100, Jonathan Wakely wrote:

http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4383.html#2455

Voted into the WP in Lenexa.

We already did the right thing for vector, so only basic_string needs
to change.

Tested powerpc64le-linux, committed to trunk.


Also committed to gcc-5-branch.


commit 634ed6e2d2ea4d69a29a8907044e6f68541d88aa
Author: Jonathan Wakely 
Date:   Wed May 13 14:21:37 2015 +0100

* include/bits/basic_string.h (basic_string::basic_string()): Make
noexcept conditional on allocator (LWG 2455).

diff --git a/libstdc++-v3/include/bits/basic_string.h 
b/libstdc++-v3/include/bits/basic_string.h
index 3e3eef4..093f502 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -377,7 +377,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
  /**
   *  @brief  Default constructor creates an empty string.
   */
-  basic_string() _GLIBCXX_NOEXCEPT
+  basic_string()
+#if __cplusplus >= 201103L
+  noexcept(is_nothrow_default_constructible<_Alloc>::value)
+#endif
  : _M_dataplus(_M_local_data())
  { _M_set_length(0); }





Re: [patch] libstdc++/65352 fix ubsan errors in std::array

2015-05-28 Thread Jonathan Wakely

On 28/05/15 12:53 +0100, Jonathan Wakely wrote:

Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::array anyway.

Tested powerpc64le-linux, committed to trunk.


And gcc-5-branch.


commit 0d999cf16b8f6a0d9bbf4bfe96b29e7b73a259e4
Author: Jonathan Wakely 
Date:   Thu May 28 12:21:36 2015 +0100

PR libstdc++/65352
* include/std/array (__array_traits::_S_ptr): New function.
(array::data): Use _S_ptr to avoid creating invalid reference.
* testsuite/23_containers/array/tuple_interface/get_neg.cc: Adjust
dg-error line numbers.
* testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc:
likewise.


Re: [patch] Rename template parameter of std::__alloc_rebind

2015-05-28 Thread Jonathan Wakely

On 01/05/15 16:23 +0100, Jonathan Wakely wrote:

The name of this alias template is a copy&paste error, it should be
_Alloc. Tested powerpc64le-linux, committed to trunk.


Also committed to gcc-5-branch.


commit a772309ec9fc300e57edd750fa32b8320d68004a
Author: Jonathan Wakely 
Date:   Fri May 1 16:12:16 2015 +0100

* include/bits/alloc_traits.h (__alloc_rebind): Change parameter name.

diff --git a/libstdc++-v3/include/bits/alloc_traits.h 
b/libstdc++-v3/include/bits/alloc_traits.h
index d6c42ec..12c6c12 100644
--- a/libstdc++-v3/include/bits/alloc_traits.h
+++ b/libstdc++-v3/include/bits/alloc_traits.h
@@ -72,8 +72,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  typedef _Alloc<_Tp, _Args...> __type;
};

-  template
-using __alloc_rebind = typename __alloctr_rebind<_Ptr, _Tp>::__type;
+  template
+using __alloc_rebind = typename __alloctr_rebind<_Alloc, _Tp>::__type;

  /**
   * @brief  Uniform interface to all allocator types.




Re: [PATCH] Optimize (CST1 << A) == CST2 (PR tree-optimization/66299)

2015-05-28 Thread Jakub Jelinek
On Thu, May 28, 2015 at 02:15:45PM +0200, Marek Polacek wrote:
> This PR points out that we weren't able to optimize 1 << x == 2 to just
> x == 1.  This is my attempt to fix that: if we see (CST1 << A) == CST2
> and CST2 is a multiple of CST1, use log2 to get rid of the shift, but
> only if the result of the shift is a natural number (including zero).

Is CST2 a multiple of CST1 the best test though?
I mean say in
(0x8001U << x) == 0x2U
0x2U isn't a multiple of 0x8001U, yet there is only one
valid value of x for which it holds (17), so we could very well
optimize that to x == 17.
If popcount of the CST1 is 1, then multiple_of_p is supposedly sufficient
(have you checked if CST1 is negative that it still works?), for others
supposedly we could have a helper function that would just try
in a loop all shift counts from 0 to precision - 1, and note when
(CST1 << b) == CST2 - if for no b, then it should fold regardless of
has_single_use to false or true, if for exactly one shift count, then
use a comparison against that shift count, otherwise give up?
Supposedly (CST1 >> A) == CST2 can be handled similarly.

> If CST2 is not a multiple of CST1, then the whole expression can be
> discarded, but I'd like to do that as a follow-up.
> (It would help if our current match.pd grammar allowed us to use "else",
> any plans on doing that?)

Jakub


Re: [PATCH] Optimize (CST1 << A) == CST2 (PR tree-optimization/66299)

2015-05-28 Thread Richard Biener
On Thu, May 28, 2015 at 2:15 PM, Marek Polacek  wrote:
> This PR points out that we weren't able to optimize 1 << x == 2 to just
> x == 1.  This is my attempt to fix that: if we see (CST1 << A) == CST2
> and CST2 is a multiple of CST1, use log2 to get rid of the shift, but
> only if the result of the shift is a natural number (including zero).
>
> If CST2 is not a multiple of CST1, then the whole expression can be
> discarded, but I'd like to do that as a follow-up.
> (It would help if our current match.pd grammar allowed us to use "else",
> any plans on doing that?)
>
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
>
> 2015-05-28  Marek Polacek  
>
> PR tree-optimization/66299
> * match.pd ((CST1 << A) == CST2 -> A == log2 (CST2 / CST1),
> (CST1 << A) != CST2 -> A != log2 (CST2 / CST1)): New
> patterns.
>
> * gcc.dg/pr66299-1.c: New test.
> * gcc.dg/pr66299-2.c: New test.
>
> diff --git gcc/match.pd gcc/match.pd
> index abd7851..5d07a70 100644
> --- gcc/match.pd
> +++ gcc/match.pd
> @@ -676,6 +676,19 @@ along with GCC; see the file COPYING3.  If not see
>(cmp (bit_and (lshift integer_onep @0) integer_onep) integer_zerop)
>(icmp @0 { build_zero_cst (TREE_TYPE (@0)); })))
>
> +/* (CST1 << A) == CST2 -> A == log2 (CST2 / CST1)
> +   (CST1 << A) != CST2 -> A != log2 (CST2 / CST1)
> +   if CST2 is a multiple of CST1.  */
> +(for cmp (ne eq)
> + (simplify
> +  (cmp (lshift@3 INTEGER_CST@0 @1) INTEGER_CST@2)
> +  (if ((TREE_CODE (@3) != SSA_NAME || has_single_use (@3))

I think we have the single_use (@3) helper now.  Not sure why you
restrict this here though - we are only creating new constants.

Ok with dropping the single-use check.

> +   && wi::multiple_of_p (@2, @0, TYPE_SIGN (type)))
> +   (with {
> +int shift = wi::exact_log2 (wi::div_trunc (@2, @0, TYPE_SIGN (type))); }
> +   (if (shift != -1)
> +(cmp @1 { build_int_cst (TREE_TYPE (@1), shift); }))

so with else you mean

(if (shift != -1)
  ...
  (else-expr ...))

?  Sure that's possible.  Today you can write

   (if (shift != -1)
  ...)
   (if (shift == -1)
 ...)

or

   (if (shift != -1)
 )
   (else-expr)

which is equivalent if the if is the only one at the nesting level and
the then-expr
doesn't contain any further ones.  That is, it is equivalent to

 if () ...;
 else-expr;

thus the fall-thru

So (if ...) would get an optional else-expr, yes, that sounds useful.
I think we
already have some (if A ..) (if !A ..) in match.pd.

Thanks,
Richard.

> +
>  /* Simplifications of conversions.  */
>
>  /* Basic strip-useless-type-conversions / strip_nops.  */
> diff --git gcc/testsuite/gcc.dg/pr66299-1.c gcc/testsuite/gcc.dg/pr66299-1.c
> index e69de29..9d41275 100644
> --- gcc/testsuite/gcc.dg/pr66299-1.c
> +++ gcc/testsuite/gcc.dg/pr66299-1.c
> @@ -0,0 +1,83 @@
> +/* PR tree-optimization/66299 */
> +/* { dg-do run } */
> +/* { dg-options "-fdump-tree-original" } */
> +
> +void
> +test1 (int x)
> +{
> +  if ((0 << x) != 0
> +  || (1 << x) != 2
> +  || (2 << x) != 4
> +  || (3 << x) != 6
> +  || (4 << x) != 8
> +  || (5 << x) != 10
> +  || (6 << x) != 12
> +  || (7 << x) != 14
> +  || (8 << x) != 16
> +  || (9 << x) != 18
> +  || (10 << x) != 20)
> +__builtin_abort ();
> +}
> +
> +void
> +test2 (int x)
> +{
> +  if (!((0 << x) == 0
> +&& (1 << x) == 4
> +&& (2 << x) == 8
> +&& (3 << x) == 12
> +&& (4 << x) == 16
> +&& (5 << x) == 20
> +&& (6 << x) == 24
> +&& (7 << x) == 28
> +&& (8 << x) == 32
> +&& (9 << x) == 36
> +   && (10 << x) == 40))
> +__builtin_abort ();
> +}
> +
> +void
> +test3 (unsigned int x)
> +{
> +  if ((0U << x) != 0U
> +  || (1U << x) != 16U
> +  || (2U << x) != 32U
> +  || (3U << x) != 48U
> +  || (4U << x) != 64U
> +  || (5U << x) != 80U
> +  || (6U << x) != 96U
> +  || (7U << x) != 112U
> +  || (8U << x) != 128U
> +  || (9U << x) != 144U
> +  || (10U << x) != 160U)
> +__builtin_abort ();
> +}
> +
> +void
> +test4 (unsigned int x)
> +{
> +  if (!((0U << x) == 0U
> +   || (1U << x) == 8U
> +   || (2U << x) == 16U
> +   || (3U << x) == 24U
> +   || (4U << x) == 32U
> +   || (5U << x) == 40U
> +   || (6U << x) == 48U
> +   || (7U << x) == 56U
> +   || (8U << x) == 64U
> +   || (9U << x) == 72U
> +   || (10U << x) == 80U))
> +__builtin_abort ();
> +}
> +
> +int
> +main (void)
> +{
> +  test1 (1);
> +  test2 (2);
> +  test3 (4U);
> +  test4 (3U);
> +}
> +
> +/* { dg-final { scan-tree-dump-not "<<" "original" } } */
> +/* { dg-final { cleanup-tree-dump "original" } } */
> diff --git gcc/testsuite/gcc.dg/pr66299-2.c gcc/testsuite/gcc.dg/pr66299-2.c
> index e69de29..dde0549 100644
> --- gcc/testsuite/gcc.dg/pr66299-2.c
> +++ gcc/testsuite/gcc.dg/pr66299-2.c
> @@ -0,0 +1,34 @@
> +/*

Re: [PATCH 01/35] Introduce new type-based pool allocator.

2015-05-28 Thread Martin Liška

On 05/27/2015 07:44 PM, Jeff Law wrote:

On 05/27/2015 07:56 AM, mliska wrote:

Hello.

Following patch set attempts to replace old-style pool allocator
to a type-based one. Moreover, as we utilize  classes and structs that are used
just by a pool allocator, these types have overwritten ctors and dtors.
Thus, using the allocator is much easier and we shouldn't cast types
back and forth. Another beneficat can be achieved in future, as we will
be able to call a class constructors to correctly register a location,
where a memory is allocated (-fgather-detailed-mem-stats).

Patch can boostrap on x86_64-linux-gnu and ppc64-linux-gnu and
survives regression tests on x86_64-linux-gnu.

Ready for trunk?
Thanks,
Martin

gcc/ChangeLog:

2015-04-30  Martin Liska  

* alloc-pool.c (struct alloc_pool_descriptor): Move definition
to header file.
* alloc-pool.h (pool_allocator::pool_allocator): New function.
(pool_allocator::release): Likewise.
(inline pool_allocator::release_if_empty): Likewise.
(inline pool_allocator::~pool_allocator): Likewise.
(pool_allocator::allocate): Likewise.
(pool_allocator::remove): Likewise.

So on a general note, I don't like changing the size of the structure based on 
ENABLE_CHECKING.  If we've got other cases where we do this, then I guess it's 
OK, but if not, I'd prefer not to start doing so.


Hello.

This mechanism has been just adapted. I find it quite useful as we have 
examples in source code where we
allocate same struct/class types from a various pool. For debugging purpose, it 
helps to identify if
release operation is called for a correct pool.





---



+
+  /* Align X to 8.  */
+  size_t align_eight (size_t x)
+  {
+return (((x+7) >> 3) << 3);
+  }
+
+  const char *m_name;
+#ifdef ENABLE_CHECKING
+  ALLOC_POOL_ID_TYPE m_id;
+#endif
+  size_t m_elts_per_block;
+
+  /* These are the elements that have been allocated at least once and freed.  
*/
+  allocation_pool_list *m_returned_free_list;
+
+  /* These are the elements that have not yet been allocated out of
+ the last block obtained from XNEWVEC.  */
+  char* m_virgin_free_list;
+
+  /* The number of elements in the virgin_free_list that can be
+ allocated before needing another block.  */
+  size_t m_virgin_elts_remaining;
+  size_t m_elts_allocated;
+  size_t m_elts_free;
+  size_t m_blocks_allocated;
+  allocation_pool_list *m_block_list;
+  size_t m_block_size;
+  size_t m_elt_size;

Several fields aren't documented.  They're largely self-explanatory, so I won't 
insist you document those trailing fields.  Your call whether or not to add 
docs for them.


Ok, even tough they are self-explanatory, I'm going to document these fields.





+
+  /* Now align the size to a multiple of 4.  */
+  size = align_eight (size);

Why not just aligned to 4, rather than a multiple of 4?  Presumably the extra 4 
bytes don't matter in practice?


Also adapted constant, hope it's chosen as the best.




+
+template 
+void
+inline pool_allocator::release_if_empty ()
+{
+  if (m_elts_free == m_elts_allocated)
+release ();
+}

Is the release_if_empty all that useful in practice?


Yes, 02/x uses that feature.



So the big issue in my mind continues to be the additional element in the 
structure when ENABLE_CHECKING is on.  As mentioned earlier, if we're already 
doing this elsewhere, then I won't object.  If we aren't, then I don't want to 
start doing so now.

The rest of the stuff are just minor questions, but nothing which would in my 
mind stop this from going forward.

Presumably your testing was with the whole series and they can't go in 
piecemeal, right?


Right, regression tests were run just once for the whole series, but I've 
tested that every individual patch can be applied and the compiler can be 
successfully built.
Anyway, I would like to commit all these patches at once (one by one).
Thus, I'm going to wait for approval for the whole series before I'll commit 
the set.

Thanks,
Martin




jeff




[Ada] Avoid use of secondary stack

2015-05-28 Thread Arnaud Charlet
This patch avoids the use of the secondary stack, and the corresponding cleanup
handlers, in many cases. For example, access discriminants no longer force
functions to return on the secondary stack. This is a speed improvement.
It is particularly relevant to the Ada.Containers.

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-05-28  Bob Duff  

* sem_util.adb (Requires_Transient_Scope): Avoid returning
function results on the secondary stack in so many cases.

Index: sem_util.adb
===
--- sem_util.adb(revision 223813)
+++ sem_util.adb(working copy)
@@ -16951,14 +16951,50 @@
--
 
--  A transient scope is required when variable-sized temporaries are
-   --  allocated in the primary or secondary stack, or when finalization
-   --  actions must be generated before the next instruction.
+   --  allocated on the secondary stack, or when finalization actions must be
+   --  generated before the next instruction.
 
+   function Old_Requires_Transient_Scope (Id : Entity_Id) return Boolean;
+   function New_Requires_Transient_Scope (Id : Entity_Id) return Boolean;
+   --  ???We retain the old and new algorithms for Requires_Transient_Scope for
+   --  the time being. New_Requires_Transient_Scope is used by default; the
+   --  debug switch -gnatdQ can be used to do Old_Requires_Transient_Scope
+   --  instead. The intent is to use this temporarily to measure before/after
+   --  efficiency. Note: when this temporary code is removed, the documentation
+   --  of dQ in debug.adb should be removed.
+
function Requires_Transient_Scope (Id : Entity_Id) return Boolean is
+  Old_Result : constant Boolean := Old_Requires_Transient_Scope (Id);
+
+   begin
+  if Debug_Flag_QQ then
+ return Old_Result;
+  end if;
+
+  declare
+ New_Result : constant Boolean := New_Requires_Transient_Scope (Id);
+
+  begin
+ --  Assert that we're not putting things on the secondary stack if we
+ --  didn't before; we are trying to AVOID secondary stack when
+ --  possible.
+
+ if not Old_Result then
+pragma Assert (not New_Result);
+null;
+ end if;
+
+ return New_Result;
+  end;
+   end Requires_Transient_Scope;
+
+   --
+   -- Old_Requires_Transient_Scope --
+   --
+
+   function Old_Requires_Transient_Scope (Id : Entity_Id) return Boolean is
   Typ : constant Entity_Id := Underlying_Type (Id);
 
-   --  Start of processing for Requires_Transient_Scope
-
begin
   --  This is a private type which is not completed yet. This can only
   --  happen in a default expression (of a formal parameter or of a
@@ -16989,9 +17025,7 @@
   --  returned value is allocated on the secondary stack. Controlled
   --  type temporaries need finalization.
 
-  elsif Is_Tagged_Type (Typ)
-or else Has_Controlled_Component (Typ)
-  then
+  elsif Is_Tagged_Type (Typ) or else Has_Controlled_Component (Typ) then
  return not Is_Value_Type (Typ);
 
   --  Record type
@@ -16999,18 +17033,20 @@
   elsif Is_Record_Type (Typ) then
  declare
 Comp : Entity_Id;
+
  begin
 Comp := First_Entity (Typ);
 while Present (Comp) loop
if Ekind (Comp) = E_Component then
+
   --  ???It's not clear we need a full recursive call to
-  --  Requires_Transient_Scope here. Note that the following
-  --  can't happen.
+  --  Old_Requires_Transient_Scope here. Note that the
+  --  following can't happen.
 
   pragma Assert (Is_Definite_Subtype (Etype (Comp)));
   pragma Assert (not Has_Controlled_Component (Etype (Comp)));
 
-  if Requires_Transient_Scope (Etype (Comp)) then
+  if Old_Requires_Transient_Scope (Etype (Comp)) then
  return True;
   end if;
end if;
@@ -17033,7 +17069,7 @@
 
  --  If component type requires a transient scope, the array does too
 
- if Requires_Transient_Scope (Component_Type (Typ)) then
+ if Old_Requires_Transient_Scope (Component_Type (Typ)) then
 return True;
 
  --  Otherwise, we only need a transient scope if the size depends on
@@ -17049,8 +17085,133 @@
  pragma Assert (Is_Protected_Type (Typ) or else Is_Task_Type (Typ));
  return False;
   end if;
-   end Requires_Transient_Scope;
+   end Old_Requires_Transient_Scope;
 
+   --
+   -- New_Requires_Transient_Scope --
+   --
+
+   function New_Requires_Transient_Scope (Id : Entity_Id) return Boolean is
+
+  function Caller_Known_Size_Record (Typ : Entity_

RE: [Patch MIPS] Enable TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS hook

2015-05-28 Thread Robert Suchanek
Hi Matthew,

> > +
> > +/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.  */
> > +
> > +static reg_class_t
> > +mips_ira_change_pseudo_allocno_class (int regno, reg_class_t
> > +allocno_class) {
> > +  if (FLOAT_MODE_P (PSEUDO_REGNO_MODE (regno)) || allocno_class !=
> > ALL_REGS)
> > +return allocno_class;
> > +  return GR_REGS;
> > +}
> > +
> 
> I'm concerned that this may not be the right condition but either way,
> I think it is better to switch this around to have the special case
> as the conditional. I found it difficult to understand what it is
> doing even when I know the intent :-) A comment about the purpose seems
> appropriate too here as it won't be obvious to someone new.

I tried to write a sensible comment and found the original change hard 
to describe.  I changed the condition to the special case and did some
experiments.  The patch below is now more concise, better fits the purpose and 
it
seems to have marginally better allocation too.

> Aren't there some fixed point modes that should go in FPRs too? I guess
> paired single (v2sf) doesn't need mentioning as it would never be
> allowed in GR_REGS so pseudos of that mode would never get ALL_REGS,
> is that correct? I.e. will we only see ALL_REGS if a particular
> pseudo/mode truly can be placed in any register according to the
> hard_regno_ok rules?

I think that with the patch below all concerns would be addressed since
the class narrowing would be constrained to integers rather than anything else.

Regards,
Robert 

diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index c3755f5..976f844 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -19415,6 +19415,21 @@ mips_lra_p (void)
 {
   return mips_lra_flag;
 }
+
+/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.  */
+
+static reg_class_t
+mips_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class)
+{
+  /* LRA will generate unnecessary reloads because the LRA's cost pass finds
+ cheaper to move data to/from memory into FP regs rather than GP regs.
+ By narrowing the class for allocnos to GR_REGS for integral modes early,
+ we refrain from using FP regs until they are absolutely necessary.  */
+  if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class == ALL_REGS)
+return GR_REGS;
+  return allocno_class;
+}
+
 

 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
@@ -19671,6 +19686,8 @@ mips_lra_p (void)
 #define TARGET_SPILL_CLASS mips_spill_class
 #undef TARGET_LRA_P
 #define TARGET_LRA_P mips_lra_p
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS 
mips_ira_change_pseudo_allocno_class
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 



Re: acc_on_device for device_type_host_nonshm

2015-05-28 Thread Julian Brown
On Thu, 28 May 2015 04:48:58 -0700
"H.J. Lu"  wrote:

> On Thu, May 21, 2015 at 4:10 AM, Jakub Jelinek 
> wrote:
> > On Thu, May 21, 2015 at 01:02:12PM +0200, Thomas Schwinge wrote:
> >> Hi!
> >>
> >> On Thu, 7 May 2015 19:32:26 +0100, Julian Brown
> >>  wrote:
> >> > Here's a new version of the patch [...]
> >>
> >> > OK for trunk?
> >>
> >> Makes sense to me (with just a request to drop the testsuite
> >> changes, see below), to get the existing regressions under
> >> control.  Jakub?
> >
> > Ok for trunk.
> >>
> >> > PR libgomp/65742
> >> >
> >> > gcc/
> >> > * builtins.c (expand_builtin_acc_on_device): Don't use
> >> > open-coded sequence for !ACCEL_COMPILER.
> >> >
> 
> It breaks bootstrap on x86:
> 
> https://gcc.gnu.org/ml/gcc-regression/2015-05/msg00389.html
> 
> I checked in this to fix it.

Apologies, and thanks!

Julian


[Ada] Avoid use of secondary stack

2015-05-28 Thread Arnaud Charlet
This patch avoids the use of the secondary stack, and the corresponding cleanup
handlers, in many cases. For example, access discriminants no longer force
functions to return on the secondary stack. This is a speed improvement.
It is particularly relevant to the Ada.Containers.

Tested on x86_64-pc-linux-gnu, committed on trunk

2015-05-28  Bob Duff  

* sem_util.adb (Requires_Transient_Scope): For definite untagged
subtypes, we should never have to use the secondary stack. This moves
toward that goal. But there are still cases that don't work.
Here, we move the check for Is_Definite first, but add a
special-purpose check for Has_Discrim_Dep_Array.

Index: sem_util.adb
===
--- sem_util.adb(revision 223814)
+++ sem_util.adb(working copy)
@@ -17103,6 +17103,11 @@
   --  could be nested inside some other record that is constrained by
   --  nondiscriminants). That is, the recursive calls are too conservative.
 
+  function Has_Discrim_Dep_Array (Typ : Entity_Id) return Boolean;
+  --  True if we find certain discriminant-dependent array
+  --  subcomponents. This shouldn't be necessary, but without this check,
+  --  we crash in gimplify. ???
+
   function Caller_Known_Size_Record (Typ : Entity_Id) return Boolean is
  pragma Assert (Typ = Underlying_Type (Typ));
 
@@ -17150,8 +17155,50 @@
  return True;
   end Caller_Known_Size_Record;
 
-  --  Local deeclarations
+  function Has_Discrim_Dep_Array (Typ : Entity_Id) return Boolean is
+ pragma Assert (Typ = Underlying_Type (Typ));
 
+  begin
+ if Is_Array_Type (Typ) then
+return Size_Depends_On_Discriminant (Typ);
+ end if;
+
+ if Is_Record_Type (Typ)
+   or else
+   Is_Protected_Type (Typ)
+ then
+declare
+   Comp : Entity_Id := First_Entity (Typ);
+
+begin
+   while Present (Comp) loop
+
+  --  Only look at E_Component entities. No need to look at
+  --  E_Discriminant entities, and we must ignore internal
+  --  subtypes generated for constrained components.
+
+  if Ekind (Comp) = E_Component then
+ declare
+Comp_Type : constant Entity_Id :=
+  Underlying_Type (Etype (Comp));
+
+ begin
+if Has_Discrim_Dep_Array (Comp_Type) then
+   return True;
+end if;
+ end;
+  end if;
+
+  Next_Entity (Comp);
+   end loop;
+end;
+ end if;
+
+ return False;
+  end Has_Discrim_Dep_Array;
+
+  --  Local declarations
+
   Typ : constant Entity_Id := Underlying_Type (Id);
 
--  Start of processing for New_Requires_Transient_Scope
@@ -17184,26 +17231,26 @@
   elsif Is_Tagged_Type (Typ) or else Has_Controlled_Component (Typ) then
  return not Is_Value_Type (Typ);
 
-  --  Indefinite (discriminated) untagged record or protected type
-
-  elsif Is_Record_Type (Typ) or else Is_Protected_Type (Typ) then
- return not Caller_Known_Size_Record (Typ);
- --  ???Should come after Is_Definite_Subtype below
-
   --  Untagged definite subtypes are known size. This includes all
   --  elementary [sub]types. Tasks are known size even if they have
   --  discriminants.
 
   elsif Is_Definite_Subtype (Typ) or else Is_Task_Type (Typ) then
- if Is_Array_Type (Typ) -- ???Shouldn't be necessary
-   and then New_Requires_Transient_Scope
-  (Underlying_Type (Component_Type (Typ)))
- then
-return True;
+ if Is_Record_Type (Typ) or else Is_Protected_Type (Typ) then
+if not Has_Discriminants (Typ) then
+   if Has_Discrim_Dep_Array (Typ) then
+  return True; -- ???Shouldn't be necessary
+   end if;
+end if;
  end if;
 
  return False;
 
+  --  Indefinite (discriminated) untagged record or protected type
+
+  elsif Is_Record_Type (Typ) or else Is_Protected_Type (Typ) then
+ return not Caller_Known_Size_Record (Typ);
+
   --  Unconstrained array
 
   else


Re: [patch] libstdc++/65352 fix ubsan errors in std::array

2015-05-28 Thread Marc Glisse

On Thu, 28 May 2015, Jonathan Wakely wrote:


Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::array anyway.


Should

return *static_cast<_Tp*>(nullptr);

be replaced with

__builtin_unreachable();

then? It seems strange to keep an implementation that is never supposed to 
be used.


--
Marc Glisse


[PATCH][5/n] Reduction vectorization improvements

2015-05-28 Thread Richard Biener

The following patch fixes loop vectorization of SLP reduction chains
that involve patterns.  The issue here is that pattern recog runs
after reduction detection and this screws things up.  Re-ordering
this created interesting side-effects so I didn't explore this
further (for now) but instead fix the detected reduction chains
after pattern recog.  This of course just reveals multiple places
where things go wrong with this setting, fixed with the following
patch which finally vectorizes one of the hottest loop nest
in a soon popular x264 encoder/decoder.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-05-28  Richard Biener  

* tree-vect-loop.c (vect_fixup_reduc_chain): New function.
(vect_fixup_scalar_cycles_with_patterns): Likewise.
(vect_analyze_loop_2): Call vect_fixup_scalar_cycles_with_patterns
after pattern recog.
(vect_create_epilog_for_reduction): Properly handle reductions
with patterns.
(vectorizable_reduction): Likewise.
* tree-vect-slp.c (vect_analyze_slp_instance): Properly mark
reduction chains.
(vect_get_constant_vectors): Create the correct number of
initial values for reductions.
(vect_schedule_slp_instance): Handle reduction chains that are
type changing properly.
* tree-vect-stmts.c (vect_analyze_stmt): Adjust.

* gcc.dg/vect/slp-reduc-sad.c: New testcase.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 223814)
+++ gcc/tree-vect-loop.c(working copy)
@@ -828,6 +828,45 @@ vect_analyze_scalar_cycles (loop_vec_inf
 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
 }
 
+/* Transfer group and reduction information from STMT to its pattern stmt.  */
+
+static void
+vect_fixup_reduc_chain (gimple stmt)
+{
+  gimple firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+  gimple stmtp;
+  gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp))
+ && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
+  GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt));
+  do
+{
+  stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+  GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp;
+  stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
+  if (stmt)
+   GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp))
+ = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt));
+}
+  while (stmt);
+  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def;
+}
+
+/* Fixup scalar cycles that now have their stmts detected as patterns.  */
+
+static void
+vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
+{
+  gimple first;
+  unsigned i;
+
+  FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
+if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first)))
+  {
+   vect_fixup_reduc_chain (first);
+   LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
+ = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first));
+  }
+}
 
 /* Function vect_get_loop_niters.
 
@@ -1708,6 +1747,8 @@ vect_analyze_loop_2 (loop_vec_info loop_
 
   vect_pattern_recog (loop_vinfo, NULL);
 
+  vect_fixup_scalar_cycles_with_patterns (loop_vinfo);
+
   /* Analyze the access patterns of the data-refs in the loop (consecutive,
  complex, etc.). FORNOW: Only handle consecutive access pattern.  */
 
@@ -4573,8 +4614,12 @@ vect_finalize_reduction:
  exit phi node.  */
   if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
 {
-  scalar_dest = gimple_assign_lhs (
-   SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
+  gimple dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
+  /* Handle reduction patterns.  */
+  if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)))
+   dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
+
+  scalar_dest = gimple_assign_lhs (dest_stmt);
   group_size = 1;
 }
 
@@ -4875,12 +4920,17 @@ vectorizable_reduction (gimple stmt, gim
   auto_vec phis;
   int vec_num;
   tree def0, def1, tem, op0, op1 = NULL_TREE;
+  bool first_p = true;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
  we don't update STMT_INFO, since only the last stmt is marked as reduction
  and has reduction properties.  */
-  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
-stmt = GROUP_FIRST_ELEMENT (stmt_info);
+  if (GROUP_FIRST_ELEMENT (stmt_info)
+  && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
+{
+  stmt = GROUP_FIRST_ELEMENT (stmt_info);
+  first_p = false;
+}
 
   if (nested_in_vect_loop_p (loop, stmt))
 {
@@ -4903,8 +4953,8 @@ vectorizable_reduction (gimple stmt, gim
 return false;
 
   /* Make sure it was already recognized as a reduction computation.  */
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
-   

Re: Do less generous pointer globbing in alias.c

2015-05-28 Thread Jan Hubicka
Hi,
here is updated version of patch.  It makes alias_set_subset_of to be symmetric 
for 
ptr_type_node and other pointer type and moves the logic of creating subsets
to get_alias_set.

I tested that perlbmk works when built at -O3 x86_64

Bootstrapped/regtested x86_64-linux, OK?

Honza

* alias.c (alias_set_entry_d): Add is_pointer.
(alias_set_subset_of): Special case pointers.
(init_alias_set_entry): Break out from ...
(record_alias_subset): ... here.
(get_alias_set): Do less generous pointer globbing.
* gcc.dg/alias-8.c: Do not xfail.
* gcc.dg/pr62167.c: Prevent FRE.
Index: alias.c
===
--- alias.c (revision 223772)
+++ alias.c (working copy)
@@ -183,10 +184,6 @@ struct GTY(()) alias_set_entry_d {
   /* The alias set number, as stored in MEM_ALIAS_SET.  */
   alias_set_type alias_set;
 
-  /* Nonzero if would have a child of zero: this effectively makes this
- alias set the same as alias set zero.  */
-  int has_zero_child;
-
   /* The children of the alias set.  These are not just the immediate
  children, but, in fact, all descendants.  So, if we have:
 
@@ -195,6 +192,15 @@ struct GTY(()) alias_set_entry_d {
  continuing our example above, the children here will be all of
  `int', `double', `float', and `struct S'.  */
   hash_map *children;
+
+  /* Nonzero if would have a child of zero: this effectively makes this
+ alias set the same as alias set zero.  */
+  bool has_zero_child;
+  /* Nonzero if alias set corresponds to pointer type itself (i.e. not to
+ aggregate contaiing pointer.
+ This is used for a special case where we need an universal pointer type
+ compatible with all other pointer types.  */
+  bool is_pointer;
 };
 typedef struct alias_set_entry_d *alias_set_entry;
 
@@ -460,12 +466,33 @@ alias_set_subset_of (alias_set_type set1
   if (set2 == 0)
 return true;
 
-  /* Otherwise, check if set1 is a subset of set2.  */
+  /* Check if set1 is a subset of set2.  */
   ase = get_alias_set_entry (set2);
   if (ase != 0
   && (ase->has_zero_child
  || ase->children->get (set1)))
 return true;
+
+  /* As a special case we consider alias set of "void *" to be both subset
+ and superset of every alias set of a pointer.  This extra symmetry does
+ not matter for alias_sets_conflict_p but it makes 
aliasing_component_refs_p
+ to return true on the following testcase:
+
+ void *ptr;
+ char **ptr2=(char **)&ptr;
+ *ptr2 = ...
+
+ This makes void * truly universal pointer type.  See pointer handling in
+ get_alias_set for more details.  */
+  if (ase && ase->is_pointer)
+{
+  alias_set_entry ase1 = get_alias_set_entry (set1);
+
+  if (ase1 && ase1->is_pointer
+ && (set1 == TYPE_ALIAS_SET (ptr_type_node)
+ || set2 == TYPE_ALIAS_SET (ptr_type_node)))
+   return true;
+}
   return false;
 }
 
@@ -764,6 +791,21 @@ alias_ptr_types_compatible_p (tree t1, t
  == TYPE_MAIN_VARIANT (TREE_TYPE (t2)));
 }
 
+/* Create emptry alias set entry.  */
+
+alias_set_entry
+init_alias_set_entry (alias_set_type set)
+{
+  alias_set_entry ase = ggc_cleared_alloc ();
+  ase->alias_set = set;
+  ase->children
+= hash_map::create_ggc (64);
+  ase->has_zero_child = 0;
+  gcc_checking_assert (!get_alias_set_entry (set));
+  (*alias_sets)[set] = ase;
+  return ase;
+}
+
 /* Return the alias set for T, which may be either a type or an
expression.  Call language-specific routine for help, if needed.  */
 
@@ -903,35 +945,92 @@ get_alias_set (tree t)
  the pointed-to types.  This issue has been reported to the
  C++ committee.
 
- In addition to the above canonicalization issue, with LTO
- we should also canonicalize `T (*)[]' to `T *' avoiding
- alias issues with pointer-to element types and pointer-to
- array types.
-
- Likewise we need to deal with the situation of incomplete
- pointed-to types and make `*(struct X **)&a' and
- `*(struct X {} **)&a' alias.  Otherwise we will have to
- guarantee that all pointer-to incomplete type variants
- will be replaced by pointer-to complete type variants if
- they are available.
-
- With LTO the convenient situation of using `void *' to
- access and store any pointer type will also become
- more apparent (and `void *' is just another pointer-to
- incomplete type).  Assigning alias-set zero to `void *'
- and all pointer-to incomplete types is a not appealing
- solution.  Assigning an effective alias-set zero only
- affecting pointers might be - by recording proper subset
- relationships of all pointer alias-sets.
-
- Pointer-to function types are another grey area which
- needs caution.  Globbing them all into one alias-set
- or the above effective zero set would work.
-
- For now just assign the same alias-set to all pointers.
- 

Re: [patch] libstdc++/65352 fix ubsan errors in std::array

2015-05-28 Thread Jonathan Wakely

On 28/05/15 14:38 +0100, Jonathan Wakely wrote:

On 28/05/15 15:26 +0200, Marc Glisse wrote:

On Thu, 28 May 2015, Jonathan Wakely wrote:


Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::array anyway.


Should

return *static_cast<_Tp*>(nullptr);

be replaced with

__builtin_unreachable();

then? It seems strange to keep an implementation that is never 
supposed to be used.


That's a good idea, I experimented with just not defining it but that
fails for explicit instantiations of array.


Would there be a danger of an object compiled with gcc-5.1 that calls
array::data() finding the _S_ref from an object compiled with
gcc-5.2 and hitting the __builtin_unreachable in vali code?



Re: [patch] libstdc++/65352 fix ubsan errors in std::array

2015-05-28 Thread Jonathan Wakely

On 28/05/15 15:26 +0200, Marc Glisse wrote:

On Thu, 28 May 2015, Jonathan Wakely wrote:


Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::array anyway.


Should

return *static_cast<_Tp*>(nullptr);

be replaced with

__builtin_unreachable();

then? It seems strange to keep an implementation that is never 
supposed to be used.


That's a good idea, I experimented with just not defining it but that
fails for explicit instantiations of array.



Re: [PATCH 13/14][ARM/AArch64 testsuite] Use gcc-dg-runtest in advsimd-intrinsics.exp

2015-05-28 Thread Alan Lawrence

Christophe Lyon wrote:


So in fact, except for the comment about '-w' it seems you initial
patch was mostly OK, right?




Well, my removing a bunch of that c-torture-init stuff, was what was causing the 
"-Og -g" variant to go missing, but apart from that, yes.


--Alan



PATCH: Mention --enable-default-pie in gcc-6/changes.html

2015-05-28 Thread H.J. Lu
OK to install?

H.J.
---
Index: gcc-6/changes.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-6/changes.html,v
retrieving revision 1.10
diff -u -p -r1.10 changes.html
--- gcc-6/changes.html  26 May 2015 10:12:08 -  1.10
+++ gcc-6/changes.html  28 May 2015 13:49:00 -
@@ -140,8 +140,12 @@ enum {
 
 
 
-
+Other significant improvements
 
+  
+Added --enable-default-pie configure option to
+   generate PIE by default.
+  
 
 
 


Re: [patch] libstdc++/65352 fix ubsan errors in std::array

2015-05-28 Thread Marc Glisse

On Thu, 28 May 2015, Jonathan Wakely wrote:


Would there be a danger of an object compiled with gcc-5.1 that calls
array::data() finding the _S_ref from an object compiled with
gcc-5.2 and hitting the __builtin_unreachable in vali code?


At -O0, maybe. To be safe you would need to give this _S_ref an arbitrary 
abi_tag. You could also replace all uses of _S_ref with *_S_ptr.


--
Marc Glisse


Re: Do less generous pointer globbing in alias.c

2015-05-28 Thread Richard Biener
On Thu, 28 May 2015, Jan Hubicka wrote:

> Hi,
> here is updated version of patch.  It makes alias_set_subset_of to be 
> symmetric for 
> ptr_type_node and other pointer type and moves the logic of creating subsets
> to get_alias_set.
> 
> I tested that perlbmk works when built at -O3 x86_64
> 
> Bootstrapped/regtested x86_64-linux, OK?
> 
> Honza
> 
>   * alias.c (alias_set_entry_d): Add is_pointer.
>   (alias_set_subset_of): Special case pointers.
>   (init_alias_set_entry): Break out from ...
>   (record_alias_subset): ... here.
>   (get_alias_set): Do less generous pointer globbing.
>   * gcc.dg/alias-8.c: Do not xfail.
>   * gcc.dg/pr62167.c: Prevent FRE.
> Index: alias.c
> ===
> --- alias.c   (revision 223772)
> +++ alias.c   (working copy)
> @@ -183,10 +184,6 @@ struct GTY(()) alias_set_entry_d {
>/* The alias set number, as stored in MEM_ALIAS_SET.  */
>alias_set_type alias_set;
>  
> -  /* Nonzero if would have a child of zero: this effectively makes this
> - alias set the same as alias set zero.  */
> -  int has_zero_child;
> -
>/* The children of the alias set.  These are not just the immediate
>   children, but, in fact, all descendants.  So, if we have:
>  
> @@ -195,6 +192,15 @@ struct GTY(()) alias_set_entry_d {
>   continuing our example above, the children here will be all of
>   `int', `double', `float', and `struct S'.  */
>hash_map *children;
> +
> +  /* Nonzero if would have a child of zero: this effectively makes this
> + alias set the same as alias set zero.  */
> +  bool has_zero_child;
> +  /* Nonzero if alias set corresponds to pointer type itself (i.e. not to
> + aggregate contaiing pointer.
> + This is used for a special case where we need an universal pointer type
> + compatible with all other pointer types.  */
> +  bool is_pointer;
>  };
>  typedef struct alias_set_entry_d *alias_set_entry;
>  
> @@ -460,12 +466,33 @@ alias_set_subset_of (alias_set_type set1
>if (set2 == 0)
>  return true;
>  
> -  /* Otherwise, check if set1 is a subset of set2.  */
> +  /* Check if set1 is a subset of set2.  */
>ase = get_alias_set_entry (set2);
>if (ase != 0
>&& (ase->has_zero_child
> || ase->children->get (set1)))
>  return true;
> +
> +  /* As a special case we consider alias set of "void *" to be both subset
> + and superset of every alias set of a pointer.  This extra symmetry does
> + not matter for alias_sets_conflict_p but it makes 
> aliasing_component_refs_p
> + to return true on the following testcase:
> +
> + void *ptr;
> + char **ptr2=(char **)&ptr;
> + *ptr2 = ...
> +
> + This makes void * truly universal pointer type.  See pointer handling in
> + get_alias_set for more details.  */
> +  if (ase && ase->is_pointer)
> +{
> +  alias_set_entry ase1 = get_alias_set_entry (set1);
> +
> +  if (ase1 && ase1->is_pointer
> +   && (set1 == TYPE_ALIAS_SET (ptr_type_node)
> +   || set2 == TYPE_ALIAS_SET (ptr_type_node)))
> + return true;
> +}
>return false;
>  }
>  
> @@ -764,6 +791,21 @@ alias_ptr_types_compatible_p (tree t1, t
> == TYPE_MAIN_VARIANT (TREE_TYPE (t2)));
>  }
>  
> +/* Create emptry alias set entry.  */
> +
> +alias_set_entry
> +init_alias_set_entry (alias_set_type set)
> +{
> +  alias_set_entry ase = ggc_cleared_alloc ();

no need to use cleared_alloc if you also init ->is_pointer to false.

> +  ase->alias_set = set;
> +  ase->children
> += hash_map::create_ggc (64);

that seems a bit excessive, esp. for pointers which won't end
up with any children?  So better make children lazily allocated
in record_alias_subset.

> +  ase->has_zero_child = 0;
> +  gcc_checking_assert (!get_alias_set_entry (set));
> +  (*alias_sets)[set] = ase;
> +  return ase;
> +}
> +
>  /* Return the alias set for T, which may be either a type or an
> expression.  Call language-specific routine for help, if needed.  */
>  
> @@ -903,35 +945,92 @@ get_alias_set (tree t)
>   the pointed-to types.  This issue has been reported to the
>   C++ committee.
>  
> - In addition to the above canonicalization issue, with LTO
> - we should also canonicalize `T (*)[]' to `T *' avoiding
> - alias issues with pointer-to element types and pointer-to
> - array types.
> -
> - Likewise we need to deal with the situation of incomplete
> - pointed-to types and make `*(struct X **)&a' and
> - `*(struct X {} **)&a' alias.  Otherwise we will have to
> - guarantee that all pointer-to incomplete type variants
> - will be replaced by pointer-to complete type variants if
> - they are available.
> -
> - With LTO the convenient situation of using `void *' to
> - access and store any pointer type will also become
> - more apparent (and `void *' is just another pointer-to
> - incomplete type).  Assigning alias-s

Re: backport the fixes of PR target/64011 and /61749 to 4.9 gcc

2015-05-28 Thread James Greenhalgh
On Wed, May 27, 2015 at 03:49:24AM +0100, weixiangyu wrote:
> Hi,

Hi,

> The first patch backports the fix of PR
> target/64011(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64011) to the 4.9
> branch from trunk r219717,

I can't approve this patch to be backported, so please do not commit it
without approval from the appropriate maintainer.

> and the second patch backports the fix of PR
> target/61749(https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61749) to the 4.9
> branch from trunk r215046.

This second patch is OK to backport to 4.9. It is a harmelss enough
patch which fixes an ICE.

Thanks,
James

> And the second one:  
> ===
> --- gcc/ChangeLog-HCC   (revision 130589)
> +++ gcc/ChangeLog-HCC   (revision 130590)
> @@ -1,3 +1,29 @@
> +2015-05-26 y00166676  
> +
> +   Backport from trunk r215046.
> +   2014-09-09  Kyrylo Tkachov  
> +
> +   PR target/61749
> +   * config/aarch64/aarch64-builtins.c (aarch64_types_quadop_qualifiers):
> +   Use qualifier_immediate for last operand.  Rename to...
> +   (aarch64_types_ternop_lane_qualifiers): ... This.
> +   (TYPES_QUADOP): Rename to...
> +   (TYPES_TERNOP_LANE): ... This.
> +   (aarch64_simd_expand_args): Return const0_rtx when encountering user
> +   error.  Change return of 0 to return of NULL_RTX.
> +   (aarch64_crc32_expand_builtin): Likewise.
> +   (aarch64_expand_builtin): Return NULL_RTX instead of 0.
> +   ICE when expanding unknown builtin.
> +   * config/aarch64/aarch64-simd-builtins.def (sqdmlal_lane): Use
> +   TERNOP_LANE qualifiers.
> +   (sqdmlsl_lane): Likewise.
> +   (sqdmlal_laneq): Likewise.
> +   (sqdmlsl_laneq): Likewise.
> +   (sqdmlal2_lane): Likewise.
> +   (sqdmlsl2_lane): Likewise.
> +   (sqdmlal2_laneq): Likewise.
> +   (sqdmlsl2_laneq): Likewise.
> +
> * gcc.target/aarch64/vqdml_lane_intrinsics-bad_1.c: New test.
> 




[PATCH, libgomp, x86]: Optimize i386 futex_wake syscall

2015-05-28 Thread Uros Bizjak
Hello!

This patch avoids unnecessary clearing of 4th argument for futex_wake
syscall for 32bit targets.

2015-05-28  Uros Bizjak  

* config/linux/x86/futex.h (sys_futex0) [!__x86_64__]: Remove function.
(futex_wait) [!__x86_64__]: Use __asm operand instead of sys_futex0.
(futex_wake) [!__x86_64__]: Ditto.

Tested on x86_64-linux-gnu {,-m32} and committed to mainline SVN.

Uros.
Index: config/linux/x86/futex.h
===
--- config/linux/x86/futex.h(revision 223796)
+++ config/linux/x86/futex.h(working copy)
@@ -81,43 +81,49 @@
 #  define SYS_futex240
 # endif
 
-static inline long
-sys_futex0 (int *addr, int op, int val)
+static inline void
+futex_wait (int *addr, int val)
 {
   long res;
 
   __asm volatile ("int $0x80"
  : "=a" (res)
- : "0"(SYS_futex), "b" (addr), "c"(op),
-   "d"(val), "S"(0)
+ : "0" (SYS_futex), "b" (addr), "c" (gomp_futex_wait),
+   "d" (val), "S" (0)
  : "memory");
-  return res;
-}
-
-static inline void
-futex_wait (int *addr, int val)
-{
-  long res = sys_futex0 (addr, gomp_futex_wait, val);
   if (__builtin_expect (res == -ENOSYS, 0))
 {
   gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
   gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
-  sys_futex0 (addr, gomp_futex_wait, val);
+  __asm volatile ("int $0x80"
+ : "=a" (res)
+ : "0" (SYS_futex), "b" (addr), "c" (gomp_futex_wait),
+   "d" (val), "S" (0)
+ : "memory");
 }
 }
 
 static inline void
 futex_wake (int *addr, int count)
 {
-  long res = sys_futex0 (addr, gomp_futex_wake, count);
+  long res;
+
+  __asm volatile ("int $0x80"
+ : "=a" (res)
+ : "0" (SYS_futex), "b" (addr), "c" (gomp_futex_wake),
+   "d" (count)
+ : "memory");
   if (__builtin_expect (res == -ENOSYS, 0))
 {
   gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
   gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
-  sys_futex0 (addr, gomp_futex_wake, count);
+  __asm volatile ("int $0x80"
+ : "=a" (res)
+ : "0" (SYS_futex), "b" (addr), "c" (gomp_futex_wake),
+   "d" (count)
+ : "memory");
 }
 }
-
 #endif /* __x86_64__ */
 
 static inline void


[gomp4] Preserve NVPTX "reconvergence" points

2015-05-28 Thread Julian Brown
For NVPTX, it is vitally important that the divergence of threads
within a warp can be controlled: in particular we must be able to
generate code that we know "reconverges" at a particular point.
Unfortunately GCC's middle-end optimisers can cause this property to
be violated, which causes problems for the OpenACC execution model
we're planning to use for NVPTX.

As a brief example: code running in vector-single mode runs on a
single thread of a warp, and must broadcast condition results to other
threads of the warp so that they can "follow along" and be ready for
vector-partitioned execution when necessary.

#pragma acc parallel
{
  #pragma acc loop gang
  for (i = 0; i < N; i++)
  {
/* This is vector-single mode.  */
n = ...;
switch (n)
{
case 1:
  #pragma acc loop vector
  for (...)
  {
/* This is vector-partitioned mode.  */
  }
  ...
}
  }
}

Here, the calculation "n = ..." takes place on a single thread (of
each partitioned gang of the outer loop), but the switch statement
(terminating the BB) must be executed by all threads in the warp. The
vector-single statements will be translated using a branch around for
the "idle" threads:

if (threadIdx.x == 0)
{
  n_0 = ...;
}
n_x = broadcast (n_0)
switch (n_x)
...

Where "broadcast" is an operation that transfers values from some
other thread of a warp (i.e., the zeroth) to the current thread
(implemented as a "shfl" instruction for NVPTX).

I observed a similar example to this cloning the broadcast and switch
instructions (in the .dom1 dump), along the lines of:

if (threadIdx.x == 0)
{
  n_0 = ...;
  n_x = broadcast (n_0)
  switch (n_x)
  ...
}
else
{
  n_x = broadcast (n_0)
  switch (n_x)
  ...
}

This doesn't work because the "broadcast" operation has to be run with
non-diverged warps for correct operation, and here there is divergence
due to the "if (threadIdx.x == 0)" condition.

So, the way I have tried to handle this is by attempting to inhibit
optimisation along edges which have a reconvergence point as their
destination. The essential idea is to make such edges "abnormal",
although the existing EDGE_ABNORMAL flag is not used because that has
implicit meaning built into it already, and the new edge type may need
to be handled differently in some areas. One example is that at
present, blocks concluding with GIMPLE_COND cannot have EDGE_ABNORMAL
set on their EDGE_TRUE or EDGE_FALSE outgoing edges.

The attached patch introduces a new edge flag (EDGE_TO_RECONVERGENCE),
for the GIMPLE CFG only. In principle there's nothing to stop the flag
being propagated to the RTL CFG also, in which case it'd probably be
set at the same time as EDGE_ABNORMAL, mirroring the semantics of e.g.
EDGE_EH, EDGE_ABNORMAL_CALL and EDGE_SIBCALL. Then, passes which
inspect the RTL CFG can continue to only check the ABNORMAL flag. But
so far (in rather limited testing!), that has not been observed to be
necessary. (We can control RTL CFG manipulation indirectly by using the
CANNOT_COPY_INSN_P target hook, sensitive e.g. to the "broadcast"
instruction.)

For the GIMPLE CFG (i.e. in passes operating on GIMPLE form),
EDGE_TO_RECONVERGENCE behaves mostly the same as EDGE_ABNORMAL (i.e.,
inhibiting certain optimisations), and so has been added to relevant
conditionals largely mechanically. Places where it is treated specially
are:

* tree-cfg.c:gimple_verify_flow_info does not permit EDGE_ABNORMAL on
  outgoing edges of a block concluding with a GIMPLE_COND statement.
  But, we allow EDGE_TO_RECONVERGENCE there.

* tree-vrp.c:find_conditional_asserts skips over outgoing GIMPLE_COND
  edges with EDGE_TO_RECONVERGENCE set (avoiding an ICE when the pass
  tries to split the edge later).

There are probably other optimisations that will be tripped up by the
new flag along the same lines as the VRP tweak above, which we will no
doubt discover in due course.

Together with the patch,

  https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02612.html

This shows no regressions for the libgomp tests.

OK for gomp4 branch?

Thanks,

Julian

ChangeLog

gcc/
* basic-block.h (EDGE_COMPLEX): Add EDGE_TO_RECONVERGENCE flag.
(bb_hash_abnorm_or_reconv_pred): New function.
(hash_abnormal_or_eh_outgoing_edge_p): Consider
EDGE_TO_RECONVERGENCE also.
* cfg-flags.def (TO_RECONVERGENCE): Add flag.
* omp-low.c (predicate_bb): Set EDGE_TO_RECONVERGENCE on edges
leading to a reconvergence point.
* cfgbuild.c (purge_dead_tablejump_edges): Consider
EDGE_TO_RECONVERGENCE.
* cfgcleanup.c (try_crossjump_to_edge, try_head_merge_bb): Likewise.
* cfgexpand.c (expand_gimple_tailcall, construct_exit_block)
(pass_expand::execute): Likewise.
* cfghooks.c (can_copy_bbs_p): Likewise.
* cfgloop.c (bb_loop_header_p): Likewise.
* cfgloopmanip.c (scale_loop_profile): Likewise.
* gimple-iterator.c (gimple_find_edge_insert_loc): Likewise.
* graph.c (draw_cfg_node_succ_edges): Likewise.
* graphite-scope-d

Re: [PATCH] hppa-linux: add missing cpp specs

2015-05-28 Thread Mike Frysinger
On 27 May 2015 14:20, John David Anglin wrote:
> On 2015-05-27 1:50 PM, Mike Frysinger wrote:
> > since i'm not looped into gcc development normally, which branches are those
> > currently ?  naively reading gcc.gnu.org homepage makes me think none since
> > they're labled "regression fixes" and afaict, none of these are regressions.
> > they've been broken for as long as the ports have existed :/.
>
> The branches are 4.8, 4.9, 5 and trunk as noted on http://gcc.gnu.org.  
> For target fixes, that don't
> affect primary or secondary targets, nobody cares about the regression 
> criteria.

gotcha.  i've committed them then to trunk/4.8/4.9/5.  hopefully didn't break 
anything ;).

> This is probably one of the causes of poor thread behavior of many 
> applications running on
> parisc hardware.  I want to see the patch in Debian and you probably 
> want it for Gentoo.

i've already merged the patches in Gentoo for 4.6+ ;)
-mike


signature.asc
Description: Digital signature


Re: Do less generous pointer globbing in alias.c

2015-05-28 Thread Jan Hubicka
> > +alias_set_entry
> > +init_alias_set_entry (alias_set_type set)
> > +{
> > +  alias_set_entry ase = ggc_cleared_alloc ();
> 
> no need to use cleared_alloc if you also init ->is_pointer to false.
OK, will update the patch.
> 
> > +  ase->alias_set = set;
> > +  ase->children
> > += hash_map::create_ggc (64);
> 
> that seems a bit excessive, esp. for pointers which won't end
> up with any children?  So better make children lazily allocated
> in record_alias_subset.

All pointers that are not in alias set of ptr_type_node will have a child.
So there is only one childless pointer set.  I will update the code though.
> 
> I still wonder why you do this instead of changing alias_sets_conflict
> in the same way you changed alias_set_subset_of.

Because I would need two flags otherwise. One denoting alias sets that
are pointers (who needs special treatment for subset_of) and one denoting
alias set that contains pointer.

i.e. for:
struct {int *a,b;}

I need to have its alias set to contain all of setof(int), setof(int *), 
setof(void *).
I however do not want setof(struct {int *a,b;}) to be subset of setof(void *)

Honza
> 
> Patch looks ok otherwise but please leave the patch for others to
> comment on for a while.
> 
> Thanks,
> Richard.
> 
> > +   }
> > +   }
> > +}
> > +  /* In LTO the rules above needs to be part of canonical type machinery.
> > + For now just punt.  */
> > +  else if (POINTER_TYPE_P (t) && t != ptr_type_node && in_lto_p)
> >  set = get_alias_set (ptr_type_node);
> >  
> >/* Otherwise make a new alias set for this type.  */
> > @@ -953,6 +1052,15 @@ get_alias_set (tree t)
> >if (AGGREGATE_TYPE_P (t) || TREE_CODE (t) == COMPLEX_TYPE)
> >  record_component_aliases (t);
> >  
> > +  /* We treat pointer types specially in alias_set_subset_of.  */
> > +  if (POINTER_TYPE_P (t) && set)
> > +{
> > +  alias_set_entry ase = get_alias_set_entry (set);
> > +  if (!ase)
> > +   ase = init_alias_set_entry (set);
> > +  ase->is_pointer = true;
> > +}
> > +
> >return set;
> >  }
> >  
> > @@ -1003,12 +,7 @@ record_alias_subset (alias_set_type supe
> >  {
> >/* Create an entry for the SUPERSET, so that we have a place to
> >  attach the SUBSET.  */
> > -  superset_entry = ggc_cleared_alloc ();
> > -  superset_entry->alias_set = superset;
> > -  superset_entry->children
> > -   = hash_map::create_ggc (64);
> > -  superset_entry->has_zero_child = 0;
> > -  (*alias_sets)[superset] = superset_entry;
> > +  superset_entry = init_alias_set_entry (superset);
> >  }
> >  
> >if (subset == 0)
> > Index: testsuite/gcc.dg/alias-8.c
> > ===
> > --- testsuite/gcc.dg/alias-8.c  (revision 223772)
> > +++ testsuite/gcc.dg/alias-8.c  (working copy)
> > @@ -8,5 +8,5 @@ struct s {
> >  void
> >  func(struct s *ptr)
> >  {
> > -  *(void **)&ptr->p = 0; /* { dg-warning "type-punned pointer" "" { xfail 
> > *-*-* } } */
> > +  *(void **)&ptr->p = 0; /* { dg-warning "type-punned pointer" "" { } } */
> >  }
> > Index: testsuite/gcc.dg/pr62167.c
> > ===
> > --- testsuite/gcc.dg/pr62167.c  (revision 223772)
> > +++ testsuite/gcc.dg/pr62167.c  (working copy)
> > @@ -29,6 +29,8 @@ main ()
> >  
> >node.prev = (void *)head;
> >  
> > +  asm("":"=m"(node.prev));
> > +
> >head->first = &node;
> >  
> >struct node *n = head->first;
> > 
> > 
> 
> -- 
> Richard Biener 
> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Dilip Upmanyu, Graham 
> Norton, HRB 21284 (AG Nuernberg)


Re: [patch] libstdc++/65352 fix ubsan errors in std::array

2015-05-28 Thread Jonathan Wakely

On 28/05/15 15:52 +0200, Marc Glisse wrote:

On Thu, 28 May 2015, Jonathan Wakely wrote:


Would there be a danger of an object compiled with gcc-5.1 that calls
array::data() finding the _S_ref from an object compiled with
gcc-5.2 and hitting the __builtin_unreachable in vali code?


At -O0, maybe. To be safe you would need to give this _S_ref an 
arbitrary abi_tag.


Or just rename it.


You could also replace all uses of _S_ref with *_S_ptr.


I considered this, but I thought that changing _S_ref(_M_elems, n) to
_S_ptr(_M_elems)[n] would fail to give an error for out-of-range
accesses in constant expressions e.g.

   constexpr std::array a{};
   constexpr int i = a[1];

But it still seems to give an error, so maybe getting rid of _S_ref
entirely is the way to go.



Re: [gomp4] Preserve NVPTX "reconvergence" points

2015-05-28 Thread Jakub Jelinek
On Thu, May 28, 2015 at 03:06:35PM +0100, Julian Brown wrote:
> For NVPTX, it is vitally important that the divergence of threads
> within a warp can be controlled: in particular we must be able to
> generate code that we know "reconverges" at a particular point.
> Unfortunately GCC's middle-end optimisers can cause this property to
> be violated, which causes problems for the OpenACC execution model
> we're planning to use for NVPTX.
> 
> As a brief example: code running in vector-single mode runs on a
> single thread of a warp, and must broadcast condition results to other
> threads of the warp so that they can "follow along" and be ready for
> vector-partitioned execution when necessary.

I think the lowering of this already at ompexp time is premature,
I think much better would be to have a function attribute (or cgraph
flag) that would be set for functions you want to compile this way
(plus a targetm flag that the targets want to support it that way),
plus a flag in loop structure for the acc loop vector loops
(perhaps the current OpenMP simd loop flags are good enough for that),
and lower it somewhere around the vectorization pass or so.

Or, what exactly do you emit for the fallback code, or for other GPGPUs
or XeonPhi?  To me e.g. for XeonPhi or HSA this sounds like you
want to implement the acc loop gang as a work-sharing loop among
threads (like #pragma omp for) and #pragma acc loop vector like
a loop that should be vectorized if at all possible (like #pragma omp simd).
I really think it is important that OpenACC GCC support is not so strongly
tied to one specific GPGPU, and similarly OpenMP should be usable for
all offloading targets GCC supports.

That way, it is possible to auto-vectorize the code too, decision how
to expand the code of offloaded function is done already separately for each
offloading target, there is a space for optimizations on much simpler
cfg, etc.

Jakub


Re: [gomp4] Preserve NVPTX "reconvergence" points

2015-05-28 Thread Richard Biener
On Thu, May 28, 2015 at 4:06 PM, Julian Brown  wrote:
> For NVPTX, it is vitally important that the divergence of threads
> within a warp can be controlled: in particular we must be able to
> generate code that we know "reconverges" at a particular point.
> Unfortunately GCC's middle-end optimisers can cause this property to
> be violated, which causes problems for the OpenACC execution model
> we're planning to use for NVPTX.
>
> As a brief example: code running in vector-single mode runs on a
> single thread of a warp, and must broadcast condition results to other
> threads of the warp so that they can "follow along" and be ready for
> vector-partitioned execution when necessary.
>
> #pragma acc parallel
> {
>   #pragma acc loop gang
>   for (i = 0; i < N; i++)
>   {
> /* This is vector-single mode.  */
> n = ...;
> switch (n)
> {
> case 1:
>   #pragma acc loop vector
>   for (...)
>   {
> /* This is vector-partitioned mode.  */
>   }
>   ...
> }
>   }
> }
>
> Here, the calculation "n = ..." takes place on a single thread (of
> each partitioned gang of the outer loop), but the switch statement
> (terminating the BB) must be executed by all threads in the warp. The
> vector-single statements will be translated using a branch around for
> the "idle" threads:
>
> if (threadIdx.x == 0)
> {
>   n_0 = ...;
> }
> n_x = broadcast (n_0)
> switch (n_x)
> ...
>
> Where "broadcast" is an operation that transfers values from some
> other thread of a warp (i.e., the zeroth) to the current thread
> (implemented as a "shfl" instruction for NVPTX).
>
> I observed a similar example to this cloning the broadcast and switch
> instructions (in the .dom1 dump), along the lines of:
>
> if (threadIdx.x == 0)
> {
>   n_0 = ...;
>   n_x = broadcast (n_0)
>   switch (n_x)
>   ...
> }
> else
> {
>   n_x = broadcast (n_0)
>   switch (n_x)
>   ...
> }
>
> This doesn't work because the "broadcast" operation has to be run with
> non-diverged warps for correct operation, and here there is divergence
> due to the "if (threadIdx.x == 0)" condition.
>
> So, the way I have tried to handle this is by attempting to inhibit
> optimisation along edges which have a reconvergence point as their
> destination. The essential idea is to make such edges "abnormal",
> although the existing EDGE_ABNORMAL flag is not used because that has
> implicit meaning built into it already, and the new edge type may need
> to be handled differently in some areas. One example is that at
> present, blocks concluding with GIMPLE_COND cannot have EDGE_ABNORMAL
> set on their EDGE_TRUE or EDGE_FALSE outgoing edges.
>
> The attached patch introduces a new edge flag (EDGE_TO_RECONVERGENCE),
> for the GIMPLE CFG only. In principle there's nothing to stop the flag
> being propagated to the RTL CFG also, in which case it'd probably be
> set at the same time as EDGE_ABNORMAL, mirroring the semantics of e.g.
> EDGE_EH, EDGE_ABNORMAL_CALL and EDGE_SIBCALL. Then, passes which
> inspect the RTL CFG can continue to only check the ABNORMAL flag. But
> so far (in rather limited testing!), that has not been observed to be
> necessary. (We can control RTL CFG manipulation indirectly by using the
> CANNOT_COPY_INSN_P target hook, sensitive e.g. to the "broadcast"
> instruction.)
>
> For the GIMPLE CFG (i.e. in passes operating on GIMPLE form),
> EDGE_TO_RECONVERGENCE behaves mostly the same as EDGE_ABNORMAL (i.e.,
> inhibiting certain optimisations), and so has been added to relevant
> conditionals largely mechanically. Places where it is treated specially
> are:
>
> * tree-cfg.c:gimple_verify_flow_info does not permit EDGE_ABNORMAL on
>   outgoing edges of a block concluding with a GIMPLE_COND statement.
>   But, we allow EDGE_TO_RECONVERGENCE there.
>
> * tree-vrp.c:find_conditional_asserts skips over outgoing GIMPLE_COND
>   edges with EDGE_TO_RECONVERGENCE set (avoiding an ICE when the pass
>   tries to split the edge later).
>
> There are probably other optimisations that will be tripped up by the
> new flag along the same lines as the VRP tweak above, which we will no
> doubt discover in due course.
>
> Together with the patch,
>
>   https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02612.html
>
> This shows no regressions for the libgomp tests.
>
> OK for gomp4 branch?

Hmm, I don't think adding a new edge flag is good nor necessary.  It seems to
me that instead the broadcast operation should have abnormal control flow
and thus basic-blocks should be split either before or after it (so either
incoming or outgoing edge(s) should be abnormal).  I suppose splitting
before the broadcast would be best (thus handle it similar to setjmp ()).

Richard.

> Thanks,
>
> Julian
>
> ChangeLog
>
> gcc/
> * basic-block.h (EDGE_COMPLEX): Add EDGE_TO_RECONVERGENCE flag.
> (bb_hash_abnorm_or_reconv_pred): New function.
> (hash_abnormal_or_eh_outgoing_edge_p): Consider
> EDGE_TO_RECONVERGENCE also.
> * cfg-flags.def (TO_R

Re: [patch] libstdc++/65352 fix ubsan errors in std::array

2015-05-28 Thread Jonathan Wakely

On 28/05/15 12:53 +0100, Jonathan Wakely wrote:

Unsurprisingly ubsan doesn't like referencing a null pointer.

With this change __array_traits::_S_ref is only used to access an
element, which is invalid for std::array anyway.

Tested powerpc64le-linux, committed to trunk.


I forgot the debug and profile modes, fixed like so.

1) Why do we even have _profile::array? What's it for?

2) If we could run 'make check-sanitize' I could have added tests for
  this bug, and could have found it still failed in debug and profile
  modes. We need to be able to run the testsuite with ubsan.

I'll commit it to trunk and gcc-5-branch after testing.

commit 7a673c403d77fb2c57620f5e4f027b679bf69635
Author: Jonathan Wakely 
Date:   Thu May 28 15:35:43 2015 +0100

	PR libstdc++/65352
	* include/profile/array (array::data): Use _S_ptr.
	* include/debug/array (array::data): Likewise.

diff --git a/libstdc++-v3/include/debug/array b/libstdc++-v3/include/debug/array
index 31d146e..411e816 100644
--- a/libstdc++-v3/include/debug/array
+++ b/libstdc++-v3/include/debug/array
@@ -216,11 +216,11 @@ namespace __debug
 
   pointer
   data() noexcept
-  { return std::__addressof(_AT_Type::_S_ref(_M_elems, 0)); }
+  { return _AT_Type::_S_ptr(_M_elems); }
 
   const_pointer
   data() const noexcept
-  { return std::__addressof(_AT_Type::_S_ref(_M_elems, 0)); }
+  { return _AT_Type::_S_ptr(_M_elems); }
 };
 
   // Array comparisons.
diff --git a/libstdc++-v3/include/profile/array b/libstdc++-v3/include/profile/array
index a90e396..5198bb3 100644
--- a/libstdc++-v3/include/profile/array
+++ b/libstdc++-v3/include/profile/array
@@ -178,11 +178,11 @@ namespace __profile
 
   pointer
   data() noexcept
-  { return std::__addressof(_AT_Type::_S_ref(_M_elems, 0)); }
+  { return _AT_Type::_S_ptr(_M_elems); }
 
   const_pointer
   data() const noexcept
-  { return std::__addressof(_AT_Type::_S_ref(_M_elems, 0)); }
+  { return _AT_Type::_S_ptr(_M_elems); }
 };
 
   // Array comparisons.


Re: [gomp4] Preserve NVPTX "reconvergence" points

2015-05-28 Thread Thomas Schwinge
Hi!

On Thu, 28 May 2015 16:20:11 +0200, Jakub Jelinek  wrote:
> On Thu, May 28, 2015 at 03:06:35PM +0100, Julian Brown wrote:
> > [...]

> I think the lowering of this already at ompexp time is premature

Yes, we're aware of this "wart".  :-|

> I think much better would be to have a function attribute (or cgraph
> flag) that would be set for functions you want to compile this way
> (plus a targetm flag that the targets want to support it that way),
> plus a flag in loop structure for the acc loop vector loops
> (perhaps the current OpenMP simd loop flags are good enough for that),
> and lower it somewhere around the vectorization pass or so.

Moving the loop lowering/expansion later is along the same lines as we've
been thinking.  Figuring out how the OpenMP simd implementation works, is
another thing I wanted to look into.

> Or, what exactly do you emit for the fallback code, or for other GPGPUs
> or XeonPhi?  To me e.g. for XeonPhi or HSA this sounds like you
> want to implement the acc loop gang as a work-sharing loop among
> threads (like #pragma omp for) and #pragma acc loop vector like
> a loop that should be vectorized if at all possible (like #pragma omp simd).
> I really think it is important that OpenACC GCC support is not so strongly
> tied to one specific GPGPU

Not disagreeing, but: we have to start somewhere.  GPU offloading and all
its peculiarities is still entering unknown terriroty in GCC; we're still
learning, and shall try to converge the emerging different
implementations in the future.  Doing the completely generic (agnostic of
specific offloading device) implementation right now is a challenging
task, hence the work on a "nvptx-specific prototype" first, to put it
this way.

That said, we of course very much welcome your continued review of our
work, and your suggestions!

> and similarly OpenMP should be usable for
> all offloading targets GCC supports.
> 
> That way, it is possible to auto-vectorize the code too, decision how
> to expand the code of offloaded function is done already separately for each
> offloading target, there is a space for optimizations on much simpler
> cfg, etc.


Grüße,
 Thomas


pgpNo5kt_UfFt.pgp
Description: PGP signature


Re: [Patch, fortran] PR66079 - [6 Regression] memory leak with source allocation in internal subprogram

2015-05-28 Thread Mikael Morin
Le 27/05/2015 23:09, Steve Kargl a écrit :
> On Wed, May 27, 2015 at 06:24:25PM +0200, Mikael Morin wrote:
>> Le 27/05/2015 16:07, Andre Vehreschild a ?crit :
>>> Hi Paul, hi Mikael,
>>>
>>> about renaming the identifier emitted: I would like to keep it short. 
>>> Remember,
>>> there is always a number attached to it, which makes it unique. Furthermore
>>> does "alloc_source_tmp" sound unnecessarily long to me. It tastes like we do
>>> not trust the unique identifier mechanism established in gfortran. But that 
>>> is
>>> just my personal taste.
>>>
>> Then let's go with "source", which seems to get the majority of the
>> votes.  It remains an improvement over "expr3" and "atmp".
>>
> 
> You do realize that expr3 holds things other than the 
> expression in a source= in an allocate, right? 
> 
I know there is mold.  I'm not aware of anything else.
Now that you tell about it, I realize that the code in that area doesn't
seem to  check for mold vs source.

Mikael


Re: [Patch, fortran] PR66079 - [6 Regression] memory leak with source allocation in internal subprogram

2015-05-28 Thread Andre Vehreschild

On Thu, 28 May 2015 16:58:44 +0200
Mikael Morin  wrote:

> Le 27/05/2015 23:09, Steve Kargl a écrit :
> > On Wed, May 27, 2015 at 06:24:25PM +0200, Mikael Morin wrote:
> >> Le 27/05/2015 16:07, Andre Vehreschild a ?crit :
> >>> Hi Paul, hi Mikael,
> >>>
> >>> about renaming the identifier emitted: I would like to keep it short.
> >>> Remember, there is always a number attached to it, which makes it unique.
> >>> Furthermore does "alloc_source_tmp" sound unnecessarily long to me. It
> >>> tastes like we do not trust the unique identifier mechanism established
> >>> in gfortran. But that is just my personal taste.
> >>>
> >> Then let's go with "source", which seems to get the majority of the
> >> votes.  It remains an improvement over "expr3" and "atmp".
> >>
> > 
> > You do realize that expr3 holds things other than the 
> > expression in a source= in an allocate, right? 
> > 
> I know there is mold.  I'm not aware of anything else.
> Now that you tell about it, I realize that the code in that area doesn't
> seem to  check for mold vs source.

Which is arbitrary.

- Andre
-- 
Andre Vehreschild * Email: vehre ad gmx dot de 


Re: [gomp4] Preserve NVPTX "reconvergence" points

2015-05-28 Thread Jakub Jelinek
On Thu, May 28, 2015 at 04:49:43PM +0200, Thomas Schwinge wrote:
> > I think much better would be to have a function attribute (or cgraph
> > flag) that would be set for functions you want to compile this way
> > (plus a targetm flag that the targets want to support it that way),
> > plus a flag in loop structure for the acc loop vector loops
> > (perhaps the current OpenMP simd loop flags are good enough for that),
> > and lower it somewhere around the vectorization pass or so.
> 
> Moving the loop lowering/expansion later is along the same lines as we've
> been thinking.  Figuring out how the OpenMP simd implementation works, is
> another thing I wanted to look into.

The OpenMP simd expansion is actually quite simple thing.
Basically, the simd loop is in ompexp expanded as a normal loop with some
flags in the loop structure (which are pretty much optimization hints).
There is a flag that the user would really like to vectorize it, and another
field that says (from what user told) what vectorization factor is safe to
use regardless of compiler's analysis.  There is some complications with
privatization clauses, so some variables are in GIMPLE represented as arrays
with maximum vf elements and indexed by internal function (simd lane), which
the vectorizer then either turns into a scalar again (if the loop isn't
vectorized), or vectorizes it and for addressables keeps in arrays with
actual vf elements.

I admit I don't know too much about OpenACC, but I'd think doing something
similar (i.e. some loop structure hint or request that a particular loop is
vectorized and perhaps something about lexical forward/backward dependencies
in the loop) could work.  Then for XeonPhi or host fallback, you'd just use
normal vectorizer.  And for PTX you could instead about the same time
instead of vectorization lower code to a single working thread doing stuff
except for simd marked loops which would be lowered to run on all threads
in the warp.

> Not disagreeing, but: we have to start somewhere.  GPU offloading and all
> its peculiarities is still entering unknown terriroty in GCC; we're still
> learning, and shall try to converge the emerging different
> implementations in the future.  Doing the completely generic (agnostic of
> specific offloading device) implementation right now is a challenging
> task, hence the work on a "nvptx-specific prototype" first, to put it
> this way.

I understand it is more work, I'd just like to ask that when designing stuff
for the OpenACC offloading you (plural) try to take the other offloading
devices and host fallback into account.  E.g. the XeonPhi is not hard to
understand, it is pretty much just a many core x86_64 chip where the
offloading is some process how to run something on the other device
and the emulation mode very well emulates that through running it in a
different process.  This stuff is already about what happens in offloaded
code, so considerations for it are similar to those for host code
(especially hosts that can vectorize).

As far as OpenMP / PTX goes, I'll try to find time for it again soon
(busy with OpenMP 4.1 work so far), but e.g. the above stuff (having
a single thread in warp do most of the non-vectorized work, and only
use other threads in the warp for vectorization) is definitely what
OpenMP will benefit from too.

Jakub


Re: [Patch]: libbacktrace - add support of PE/COFF

2015-05-28 Thread Ian Lance Taylor
On Thu, May 28, 2015 at 5:01 AM, Tristan Gingold  wrote:
>
>> On 28 May 2015, at 02:26, Ian Lance Taylor  wrote:
>
>> The #include  will break cross-compilers.  It's not OK for
>> trunk until that is fixed.
>
> I am confused by this comment, for two reasons:
>
> - I don’t see how that would break cross-compilers.  Cross compilers
>  hosted on windows are not impacted by this include when the library is
>  used for the tools.  When then backtrace library is used for the target,
>  pecoff is not used unless the target is windows.
>  So I don’t see a case where the include breaks cross-compilers.

The way you have written the code, I'm fairly sure that it will be
compiled for an i386-coff target.


> - If the case exists, I don’t see how to implement backtrace within
>  shared libraries: I need a windows specific function to get the list
>  of DLL.

I would be OK with a #include  that is conditional on
something that indicates that the host (from the point of view of
libbacktrace) really is Windows.


The new version of the patch is OK.

Thanks.

Ian


Re: [PATCH] Don't combine param and return value copies

2015-05-28 Thread Segher Boessenkool
On Tue, May 26, 2015 at 04:37:46PM +0930, Alan Modra wrote:
> On powerpc64le, modifying the way combine treats function parameters
> and call arguments results in some regressions.
> 
> For instance, this testcase from varasm.c
> 
> extern int foo3 (void *, ...);
> extern void foo4 (void *, const char *);
> int
> emit_tls_common (void *decl,
>const char *name,
>unsigned long size)
> {
>   foo3 (0, "\t%s\t", "..");
>   foo4 (0, name);
>   foo3 (0, ",%lu,%u\n", size, ((unsigned int *)decl)[88] / 8);
>   return 1;
> }
> 
> at -O2 produces for the prologue and first call
> 
> old   new
>   mflr 0  mflr 0
>   std 29,-24(1)   std 29,-24(1)
>   std 30,-16(1)   std 30,-16(1)
>   mr 29,4 addis 9,2,.LC0@toc@ha
>   std 31,-8(1)std 31,-8(1)
>   addis 4,2,.LC1@toc@ha   addis 10,2,.LC1@toc@ha
>   mr 31,5 addi 9,9,.LC0@toc@l
>   addis 5,2,.LC0@toc@ha   addi 10,10,.LC1@toc@l
>   mr 30,3 mr 30,3
>   addi 5,5,.LC0@toc@l mr 29,4
>   addi 4,4,.LC1@toc@l mr 31,5
>   li 3,0  mr 4,10
>   std 0,16(1) mr 5,9
>   stdu 1,-128(1)  std 0,16(1)
>   bl foo3 stdu 1,-128(1)
>   nop li 3,0
>   bl foo3
>   nop
> 
> As you can see, we have some extra register shuffling from keeping a
> pseudo for arg setup insns.  I guess the pseudos allow sched more
> freedom to mess around..

... and then RA isn't able to move things back.  I see this happening
with all three changes (return value, incoming args, outgoing args);
the changes to combine give sched1 and RA more freedom, but those then
end up generating lots of unnecessary register moves.

> On the positive side, I saw cases where keeping parameter pseudos
> allowed shrink-wrap to occur.  varasm.c:decode_reg_name_and_count is
> one of them.  More shrink-wrapping is a big win.
> 
> Here's a case where changes at the return result in poorer code
> int
> decl_readonly_section_1 (int category)
> {
>   switch (category)
> {
> case 1:
> case 2:
> case 3:
> case 4:
> case 5:
>   return 1;
> default:
>   return 0;
> }
> }
> old   new
>   addi 9,3,-6 addi 9,3,-6
>   neg 3,3 neg 3,3
>   and 3,9,3   and 3,9,3
>   rldicl 3,3,33,63srwi 3,3,31
>   blr rldicl 3,3,0,32
>   blr
> 
> Previously this:
> (insn 35 34 36 2 (set (reg:SI 161)
> (lshiftrt:SI (reg:SI 164)
> (const_int 31 [0x1f]))) {lshrsi3})
> (insn 36 35 23 2 (set (reg:DI 155 [ D.2441 ])
> (zero_extend:DI (reg:SI 161))) {zero_extendsidi2})
> (insn 23 36 24 2 (set (reg/i:DI 3 3)
> (reg:DI 155 [ D.2441 ])) {*movdi_internal64})
> 
> is first combined to
> (insn 35 34 36 2 (set (reg:SI 161)
> (lshiftrt:SI (reg:SI 164)
> (const_int 31 [0x1f]))) {lshrsi3})
> (insn 23 35 24 2 (set (reg/i:DI 3 3)
>   (and:DI (subreg:DI (reg:SI 161) 0)
>   (const_int 1 [0x1]
> which is somewhat surprising, but from my previous forays into
> combine.c I'd say happens due to known zero bits.  (Just looking at
> dumps here, rather than in gdb.)
> 
> Then the above is further combined to
> (insn 23 34 24 2 (set (reg/i:DI 3 3)
>   (zero_extract:DI (subreg:DI (reg:SI 164) 0)
>   (const_int 1 [0x1])
>   (const_int 32 [0x20])))
> 
> Looks to me like a missed optimization opportunity that insns 35 and
> 36 aren't combined without first going through the intermediate step.

The rs6000 backend doesn't have zero_extend variants of many of its
patterns, only some.  Well-known problem, long-term project.

> Anyway, here's the rewritten patch.  I've left in some knobs I used
> when testing in case you want to see for yourself what happens with
> various options.  Bootstrapped etc. powerpc64le-linux and
> x86_64-linux.

> +#define DONT_COMBINE_PARAMS 1
> +#define DONT_COMBINE_CALL_ARGS 1

I tested with all combinations of those knob settings, building Linux
kernels (mostly defconfigs); these are the resulting text sizes:

  master   alan00   alan10   alan01   alan11
 5432728  5432728  5433848  5435472  5436080  alpha
 3851131  3851391  3852495  3852567  3853755  arm
 2190716  2190716  2190716  2190708  2190708  blackfin
 2191439  2191503  2191983  2192335  2192751  c6x
 2213186  2213250  2213154  2213482  2213546  cris
 3322420  3322420  3322500  3322564  3322692  frv
10898664 10898664 10898664 10898664 10898664  i386
 3253459  3253539  3253599  3255235  3255331  m32r
 4708528  4708532  4709772  4708660  4709656  microblaze
 3949689  3949745  3950269 

[Ada] Fix ICEs with function returning variable-sized type

2015-05-28 Thread Eric Botcazou
This fixes various cases of a common pattern that would result in an ICE in 
the gimplifier because it is trying to create a temporary of variable size.

Tested on x86_64-suse-linux, applied on the mainline.


2015-05-28  Eric Botcazou  

* gcc-interface/gigi.h (gnat_stabilize_reference): Adjust.
(rewrite_fn): Remove third parameter.
(type_is_padding_self_referential): New inline predicate.
(return_type_with_variable_size_p): Likewise.
* gcc-interface/decl.c (allocatable_size_p): More around.
(cannot_be_superflat_p): Rename into...
(cannot_be_superflat ): ...this.
(initial_value_needs_conversion): New predicate.
(gnat_to_gnu_entity): Invoke type_is_padding_self_referential,
initial_value_needs_conversion and adjust to above renaming.
For a renaming, force the materialization if the inner expression
is compound.  Adjust calls to elaborate_reference and build a
compound expression if needed.
(struct er_dat): Add N field.
(elaborate_reference_1): Remove N parameter and adjust.
(elaborate_reference): Add INIT parameter and pass it in the call to
gnat_rewrite_reference.  Adjust initial expression.
* gcc-interface/trans.c (Call_to_gnu): Treat renamings the same way as
regular object declarations when it comes to creating a temporary.
Adjust call to gnat_stabilize_reference and build a compound expression
if needed.  Invoke return_type_with_variable_size_p.
(gnat_to_gnu): Invoke type_is_padding_self_referential.  In case #4,
return a call to a function unmodified if it returns with variable size
and is also the initial expression in an object declaration.
* gcc-interface/utils2.c (build_binary_op) : Use the RHS'
type if it is a call to a function that returns with variable size.
(build_unary_op): Invoke type_is_padding_self_referential.
(gnat_stabilize_reference_1): Remove N parameter and adjust.
(gnat_stabilize_reference): Add INIT parameter and pass it in the call
to gnat_rewrite_reference.
(gnat_rewrite_reference):  Remove N, add INIT parameter and adjust.
: New case.


2015-05-28  Eric Botcazou  

* gnat.dg/varsize_temp.adb: Rename into...
* gnat.dg/varsize1.adb: ...this.
* gnat.dg/varsize_copy.ad[sb]: Rename into...
* gnat.dg/varsize2.ad[sb]: ...this.
* gnat.dg/varsize3_1.adb: New test.
* gnat.dg/varsize3_2.adb: Likewise.
* gnat.dg/varsize3_3.adb: Likewise.
* gnat.dg/varsize3_4.adb: Likewise.
* gnat.dg/varsize3_5.adb: Likewise.
* gnat.dg/varsize3_6.adb: Likewise.
* gnat.dg/varsize3_pkg1.ads: New helper.
* gnat.dg/varsize3_pkg2.ads: Likewise.
* gnat.dg/varsize3_pkg3.ads: Likewise.


-- 
Eric BotcazouIndex: gcc-interface/decl.c
===
--- gcc-interface/decl.c	(revision 223768)
+++ gcc-interface/decl.c	(working copy)
@@ -168,7 +168,6 @@ struct value_annotation_hasher : ggc_cac
 
 static GTY ((cache)) hash_table *annotate_value_cache;
 
-static bool allocatable_size_p (tree, bool);
 static void prepend_one_attribute (struct attrib **,
    enum attr_type, tree, tree, Node_Id);
 static void prepend_one_attribute_pragma (struct attrib **, Node_Id);
@@ -179,7 +178,7 @@ static bool type_has_variable_size (tree
 static tree elaborate_expression_1 (tree, Entity_Id, const char *, bool, bool);
 static tree elaborate_expression_2 (tree, Entity_Id, const char *, bool, bool,
 unsigned int);
-static tree elaborate_reference (tree, Entity_Id, bool);
+static tree elaborate_reference (tree, Entity_Id, bool, tree *);
 static tree gnat_to_gnu_component_type (Entity_Id, bool, bool);
 static tree gnat_to_gnu_param (Entity_Id, Mechanism_Type, Entity_Id, bool,
 			   bool *);
@@ -189,8 +188,10 @@ static tree change_qualified_type (tree,
 static bool same_discriminant_p (Entity_Id, Entity_Id);
 static bool array_type_has_nonaliased_component (tree, Entity_Id);
 static bool compile_time_known_address_p (Node_Id);
-static bool cannot_be_superflat_p (Node_Id);
+static bool cannot_be_superflat (Node_Id);
 static bool constructor_address_p (tree);
+static bool allocatable_size_p (tree, bool);
+static bool initial_value_needs_conversion (tree, tree);
 static int compare_field_bitpos (const PTR, const PTR);
 static bool components_to_record (tree, Node_Id, tree, int, bool, bool, bool,
   bool, bool, bool, bool, bool, tree, tree *);
@@ -957,8 +958,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entit
 	   to make it more likely to rename the underlying object.  */
 	if (Present (Renamed_Object (gnat_entity)))
 	  {
-	/* If the renamed object had padding, strip off the reference
-	   to the inner object and reset our type.  */
+	/* If the renamed object had padding, strip off the reference to
+	   the i

[gomp4, committed, PR65419] Add IFN_GOACC_DATA_END_WITH_ARG

2015-05-28 Thread Tom de Vries

Hi,

this patch fixes PR65419.

Consider this test-case:
...
void
f (void)
{
  int i;

#pragma acc data copyout (i)
  {

  }
}
...

When compiling the oacc data region, the start and end are marked with 
GOACC_data_start and GOACC_data_end:

...
  .omp_data_arr.1.i = &i;
  GOACC_data_start (-1, 1, &.omp_data_arr.1, &.omp_data_sizes.2,
&.omp_data_kinds.3);
  GOACC_data_end ();
  .omp_data_arr.1 = {CLOBBER};
 ..

We're marking the &.omp_data_arr.1 argument of GOACC_data_start with 
fnspec 'r', meaning NOESCAPE and NOCLOBBER, which has the effect that 
the call to GOACC_data_end is optimized to a tail call.


But actually, during GOACC_data_end we write i's accelerator value back 
to i, which due to the tail call optimization is no longer allocated. 
This causes a runtime error.


So actually, the fact that we write i's accelerator value back to i 
during GOACC_data_end, means i and .omp_data_arr escape during 
GOACC_data_start.


The easy way to fix this is to remove the 'r' in the fnspec for the 
GOACC_data_start &.omp_data_arr. argument. But that would mean that 
GOACC_data_start would become an optimization barrier, which would mean 
missed optimizations in the kernels region.



This patch fixes the problem by adding the &.omp_data_arr argument to 
the new internal function IFN_GOACC_DATA_END_WITH_ARG:

...
  .omp_data_arr.1.i = &i;
  GOACC_data_start (-1, 1, &.omp_data_arr.1, &.omp_data_sizes.2,
&.omp_data_kinds.3);
  GOACC_DATA_END_WITH_ARG (&.omp_data_arr.1);
  .omp_data_arr.1 = {CLOBBER};
...
This allows us to pretend that .omp_data_arr does not escape in 
GOACC_data_start.


The internal function call is replaced by a GOACC_data_end call before 
expand, dropping the argument not to break the abi:

...
  .omp_data_arr.1.i = &i;
  GOACC_data_start (-1, 1, &.omp_data_arr.1, &.omp_data_sizes.2,
&.omp_data_kinds.3);
  GOACC_data_end ();
  .omp_data_arr.1 ={v} {CLOBBER};
...


Bootstrapped and regtested on gomp-4_0-branch, committed to gomp-4_0-branch.

Thanks,
- Tom
Add IFN_GOACC_DATA_END_WITH_ARG

2015-05-28  Tom de Vries  

	PR tree-optimization/65419
	* cfgexpand.c (pass_data_expand): Add PROP_gimple_lompifn to
	properties_required field.
	* gimplify.c (gimplify_omp_workshare): Use IFN_GOACC_DATA_END_WITH_ARG
	instead of BUILT_IN_GOACC_DATA_END.  Clear PROP_gimple_lompifn in
	curr_properties.
	(gimplify_function_tree): Tentatively set PROP_gimple_lompifn in
	curr_properties.
	* internal-fn.c (expand_GOACC_DATA_END_WITH_ARG): New dummy function.
	* internal-fn.def (GOACC_DATA_END_WITH_ARG): New DEF_INTERNAL_FN.
	* omp-low.c (lower_omp_target): Set argument of GOACC_DATA_END_WITH_ARG.
	(pass_data_late_lower_omp): New pass_data.
	(pass_late_lower_omp): New pass.
	(pass_late_lower_omp::gate, pass_late_lower_omp::execute)
	(make_pass_late_lower_omp): New function.
	* passes.def: Add pass_late_lower_omp.
	* tree-inline.c (expand_call_inline): Handle PROP_gimple_lompifn.
	* tree-pass.h (PROP_gimple_lompifn): Add define.

	* testsuite/libgomp.oacc-c-c++-common/goacc-data-end.c: New test.
---
 gcc/cfgexpand.c|  3 +-
 gcc/gimplify.c | 25 +--
 gcc/internal-fn.c  |  9 +++
 gcc/internal-fn.def|  1 +
 gcc/omp-low.c  | 86 +-
 gcc/passes.def |  1 +
 gcc/tree-inline.c  | 16 ++--
 gcc/tree-pass.h|  2 +
 .../libgomp.oacc-c-c++-common/goacc-data-end.c | 68 +
 9 files changed, 197 insertions(+), 14 deletions(-)
 create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/goacc-data-end.c

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 5905ddb..6941e3e 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -5900,7 +5900,8 @@ const pass_data pass_data_expand =
   ( PROP_ssa | PROP_gimple_leh | PROP_cfg
 | PROP_gimple_lcx
 | PROP_gimple_lvec
-| PROP_gimple_lva), /* properties_required */
+| PROP_gimple_lva
+| PROP_gimple_lompifn), /* properties_required */
   PROP_rtl, /* properties_provided */
   ( PROP_ssa | PROP_trees ), /* properties_destroyed */
   0, /* todo_flags_start */
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 912b60f..c85b424 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -7640,20 +7640,32 @@ gimplify_omp_workshare (tree *expr_p, gimple_seq *pre_p)
 	pop_gimplify_context (NULL);
   if (ort == ORT_TARGET_DATA)
 	{
-	  enum built_in_function end_ix;
 	  switch (TREE_CODE (expr))
 	{
 	case OACC_DATA:
-	  end_ix = BUILT_IN_GOACC_DATA_END;
+	  /* Rather than building a call to BUILT_IN_GOACC_DATA_END, we use
+		 this ifn which is similar, but has a pointer argument, which
+		 will be later set to the &.omp_data_arr of the corresponding
+		 BUILT_IN_GOACC_DATA_START.
+		 This allows u

Re: [Patch, fortran, PR44672, v6] [F08] ALLOCATE with SOURCE and no array-spec

2015-05-28 Thread Andre Vehreschild
Hi Mikael,

thanks for the comments so far.

> I don't understand why one of your previous patches was factoring the
> source expression evaluation to a temporary in gfc_trans_allocate, and
> now with this patch you do the same thing in gfc_resolve_allocate, not
> reusing the part in gfc_trans_allocate.

When I remember correctly, then at the time of writing this patch the one
factoring out the temporary in gfc_trans_allocate() was not doing that yet. At
least it was not doing it always as needed. Therefore we are looking at a kind
of history here already. 

> 
> > *** failure:
> > *** 7201,7212 
> > --- 7212,7229 
> > return false;
> >   }
> >   
> > + 
> >   static void
> >   resolve_allocate_deallocate (gfc_code *code, const char *fcn)
> >   {
> > gfc_expr *stat, *errmsg, *pe, *qe;
> > gfc_alloc *a, *p, *q;
> >   
> > +   /* When this flag is set already, then this allocate has already been
> > +  resolved.  Doing so again, would result in an endless loop.  */
> > +   if (code->ext.alloc.arr_spec_from_expr3)
> > + return;
> > + 
> I expect you'll miss some error messages by doing this.
> Where is the endless loop?

This has been removed. The endless loop was triggered by gfc_resolve_code () in
line 179 of the patch, which is now in chunk that is mostly removed.

> > *** resolve_allocate_deallocate (gfc_code *c
> > *** 7375,7382 
> > --- 7392,7500 
> >   
> > if (strcmp (fcn, "ALLOCATE") == 0)
> >   {
> > +   bool arr_alloc_wo_spec = false;
> > for (a = code->ext.alloc.list; a; a = a->next)
> > !   resolve_allocate_expr (a->expr, code, &arr_alloc_wo_spec);
> > ! 
> > !   if (arr_alloc_wo_spec && code->expr3)
> > !   {
>   [...]
> > ! 
> > ! ass = gfc_get_code (EXEC_ASSIGN);
> This memory is not freed as far as I know.
> I think you can use a local variable for it.

Complete block removed. Therefore fixed.

> *** /tmp/PRaWHc_trans-expr.c  2015-05-25 19:54:35.056309429 +0200
> --- /tmp/7e82nd_trans-expr.c  2015-05-25 19:54:35.058309429 +0200
> *** gfc_conv_procedure_call (gfc_se * se, gf
> *** 5328,5334 
> if (e && (e->ts.type == BT_DERIVED || e->ts.type == BT_CLASS)
>   && e->ts.u.derived->attr.alloc_comp
>   && !(e->symtree && e->symtree->n.sym->attr.pointer)
> ! && (e->expr_type != EXPR_VARIABLE && !e->rank))
>   {
> int parm_rank;
> tmp = build_fold_indirect_ref_loc (input_location,
> --- 5328,5335 
> if (e && (e->ts.type == BT_DERIVED || e->ts.type == BT_CLASS)
>   && e->ts.u.derived->attr.alloc_comp
>   && !(e->symtree && e->symtree->n.sym->attr.pointer)
> ! && e->expr_type != EXPR_VARIABLE && !e->rank
> ! && e->expr_type != EXPR_STRUCTURE)
>   {
> int parm_rank;
> tmp = build_fold_indirect_ref_loc (input_location,
> 
> Can't you remove this? It's undone by the PR58586 patch.

Removed, looks like an artefact of a long forgotten need.

> > *** gfc_trans_allocate (gfc_code * code)
> > *** 5733,5746 
> >   
> >   if (dataref && dataref->u.c.component->as)
> > {
> > ! int dim;
> >   gfc_expr *temp;
> >   gfc_ref *ref = dataref->next;
> >   ref->u.ar.type = AR_SECTION;
> >   /* We have to set up the array reference to give ranges
> >  in all dimensions and ensure that the end and stride
> >  are set so that the copy can be scalarized.  */
> > - dim = 0;
> >   for (; dim < dataref->u.c.component->as->rank; dim++)
> > {
> >   ref->u.ar.dimen_type[dim] = DIMEN_RANGE;
> > --- 5758,5815 
> >   
> >   if (dataref && dataref->u.c.component->as)
> > {
> > ! int dim = 0;
> >   gfc_expr *temp;
> >   gfc_ref *ref = dataref->next;
> >   ref->u.ar.type = AR_SECTION;
> > + if (code->ext.alloc.arr_spec_from_expr3)
> > +   {
> > + /* Take the array dimensions from the
> > +source=-expression.  */
> > + gfc_array_ref *source_ref =
> > + gfc_find_array_ref (code->expr3);
> Does this work?  code->expr3 is not always a variable.

The block removed from resolve_allocate() ensured, that this was always a
variable. Therefore, yes, it had to work then. Now, we of course have far more
trouble.

> 
> > + if (source_ref->type == AR_FULL)
> > +   {
> > + /* For full array refs copy the bounds.  */
> > + for (; dim < dataref->u.c.component->as->rank;
> > dim++)
> > +   {
> > + ref->u.ar.dimen_type[dim] = DIMEN_RANGE;
> > + ref->u.ar.start[dim] =
> > + gfc_copy_expr
> > (source_ref->as->lower[

[Ada] Tidy up gnat_pushdecl

2015-05-28 Thread Eric Botcazou
And more precisely the block of code at the end responsible for setting the 
name of types and creating associated typedefs if necessary.

Tested on x86_64-suse-linux, applied on the mainline.


2015-05-28  Eric Botcazou  

* gcc-interface/utils.c (gnat_pushdecl): Reunify the handling of array
and pointer types wrt DECL_ORIGINAL_TYPE and adjust left and right.


-- 
Eric BotcazouIndex: gcc-interface/utils.c
===
--- gcc-interface/utils.c	(revision 223831)
+++ gcc-interface/utils.c	(working copy)
@@ -776,31 +776,21 @@ gnat_pushdecl (tree decl, Node_Id gnat_n
 {
   tree t = TREE_TYPE (decl);
 
+  /* Array and pointer types aren't tagged types in the C sense so we need
+	 to generate a typedef in DWARF for them and make sure it is preserved,
+	 unless the type is artificial.  */
   if (!(TYPE_NAME (t) && TREE_CODE (TYPE_NAME (t)) == TYPE_DECL)
-	  && (TREE_CODE (t) != POINTER_TYPE || DECL_ARTIFICIAL (decl)))
-	{
-	  /* Array types aren't "tagged" types so we force the type to be
-	 associated with its typedef in the DWARF back-end, in order to
-	 make sure that the latter is always preserved, by creating an
-	 on-side copy for DECL_ORIGINAL_TYPE.  We used to do the same
-	 for pointer types, but to have consistent DWARF output we now
-	 create a copy for the type itself and use the original type
-	 for DECL_ORIGINAL_TYPE like the C front-end.  */
-	  if (!DECL_ARTIFICIAL (decl) && TREE_CODE (t) == ARRAY_TYPE)
-	{
-	  tree tt = build_distinct_type_copy (t);
-	  /* Array types need to have a name so that they can be related
-		 to their GNAT encodings.  */
-	  TYPE_NAME (tt) = DECL_NAME (decl);
-	  defer_or_set_type_context (tt,
-	 DECL_CONTEXT (decl),
-	 deferred_decl_context);
-	  TYPE_STUB_DECL (tt) = TYPE_STUB_DECL (t);
-	  DECL_ORIGINAL_TYPE (decl) = tt;
-	}
-	}
+	  && ((TREE_CODE (t) != ARRAY_TYPE && TREE_CODE (t) != POINTER_TYPE)
+	  || DECL_ARTIFICIAL (decl)))
+	;
+  /* For array and pointer types, create the DECL_ORIGINAL_TYPE that will
+	 generate the typedef in DWARF.  Also do that for fat pointer types
+	 because, even though they are tagged types in the C sense, they are
+	 still XUP types attached to the base array type at this point.  */
   else if (!DECL_ARTIFICIAL (decl)
-	   && (TREE_CODE (t) == POINTER_TYPE || TYPE_IS_FAT_POINTER_P (t)))
+	   && (TREE_CODE (t) == ARRAY_TYPE
+		   || TREE_CODE (t) == POINTER_TYPE
+		   || TYPE_IS_FAT_POINTER_P (t)))
 	{
 	  tree tt;
 	  /* ??? Copy and original type are not supposed to be variant but we
@@ -811,7 +801,8 @@ gnat_pushdecl (tree decl, Node_Id gnat_n
 	{
 	  /* TYPE_NEXT_PTR_TO is a chain of main variants.  */
 	  tt = build_distinct_type_copy (TYPE_MAIN_VARIANT (t));
-	  TYPE_NEXT_PTR_TO (TYPE_MAIN_VARIANT (t)) = tt;
+	  if (TREE_CODE (t) == POINTER_TYPE)
+		TYPE_NEXT_PTR_TO (TYPE_MAIN_VARIANT (t)) = tt;
 	  tt = build_qualified_type (tt, TYPE_QUALS (t));
 	}
 	  TYPE_NAME (tt) = decl;
@@ -820,29 +811,36 @@ gnat_pushdecl (tree decl, Node_Id gnat_n
  deferred_decl_context);
 	  TREE_USED (tt) = TREE_USED (t);
 	  TREE_TYPE (decl) = tt;
-	  if (TYPE_NAME (t) != NULL_TREE
+	  if (TYPE_NAME (t)
 	  && TREE_CODE (TYPE_NAME (t)) == TYPE_DECL
 	  && DECL_ORIGINAL_TYPE (TYPE_NAME (t)))
 	DECL_ORIGINAL_TYPE (decl) = DECL_ORIGINAL_TYPE (TYPE_NAME (t));
 	  else
 	DECL_ORIGINAL_TYPE (decl) = t;
+	  /* Array types need to have a name so that they can be related to
+	 their GNAT encodings.  */
+	  if (TREE_CODE (t) == ARRAY_TYPE && !TYPE_NAME (t))
+	TYPE_NAME (t) = DECL_NAME (decl);
 	  t = NULL_TREE;
 	}
-  else if (TYPE_NAME (t) != NULL_TREE
+  else if (TYPE_NAME (t)
 	   && TREE_CODE (TYPE_NAME (t)) == TYPE_DECL
 	   && DECL_ARTIFICIAL (TYPE_NAME (t)) && !DECL_ARTIFICIAL (decl))
 	;
   else
 	t = NULL_TREE;
 
-  /* Propagate the name to all the anonymous variants.  This is needed
-	 for the type qualifiers machinery to work properly (see
-	 check_qualified_type).  Also propagate the context to them.  Note that
-	 the context will be propagated to all parallel types too thanks to
-	 gnat_set_type_context.  */
+  /* Propagate the name to all the variants, this is needed for the type
+	 qualifiers machinery to work properly (see check_qualified_type).
+	 Also propagate the context to them.  Note that it will be propagated
+	 to all parallel types too thanks to gnat_set_type_context.  */
   if (t)
 	for (t = TYPE_MAIN_VARIANT (t); t; t = TYPE_NEXT_VARIANT (t))
-	  if (!(TYPE_NAME (t) && TREE_CODE (TYPE_NAME (t)) == TYPE_DECL))
+	  /* ??? Because of the previous kludge, we can have variants of fat
+	 pointer types with different names.  */
+	  if (!(TYPE_IS_FAT_POINTER_P (t)
+		&& TYPE_NAME (t)
+		&& TREE_CODE (TYPE_NAME (t)) == TYPE_DECL))
 	{
 	  TYPE_NAME (t) = decl;
 	  defer

[Ada] Fix bogus Constraint_Error raised for Max_Size_In_Storage_Elements

2015-05-28 Thread Eric Botcazou
We raise bogus Constraint_Error for the Max_Size_In_Storage_Elements attribute 
applied to unconstrained array types and types derived from them.  This very 
likely was introduced when sizetype was changed to unsigned.

Tested on x86_64-suse-linux, applied on the mainline.


2015-05-28  Eric Botcazou  

* gcc-interface/utils.c (max_size) : Add special code to
deal with the subtraction of a "negative" value in an unsigned type.


2015-05-28  Eric Botcazou  

* gnat.dg/discr43.adb: New test.


-- 
Eric BotcazouIndex: gcc-interface/utils.c
===
--- gcc-interface/utils.c	(revision 223835)
+++ gcc-interface/utils.c	(working copy)
@@ -3443,9 +3443,23 @@ max_size (tree exp, bool max_p)
 	if ((code == MINUS_EXPR || code == PLUS_EXPR)
 	&& TREE_CODE (lhs) == INTEGER_CST
 	&& TREE_OVERFLOW (lhs)
-	&& !TREE_CONSTANT (rhs))
+	&& TREE_CODE (rhs) != INTEGER_CST)
 	  return lhs;
 
+	/* If we are going to subtract a "negative" value in an unsigned type,
+	   do the operation as an addition of the negated value, in order to
+	   avoid creating a spurious overflow below.  */
+	if (code == MINUS_EXPR
+	&& TYPE_UNSIGNED (type)
+	&& TREE_CODE (rhs) == INTEGER_CST
+	&& !TREE_OVERFLOW (rhs)
+	&& tree_int_cst_sign_bit (rhs) != 0)
+	  {
+	rhs = fold_build1 (NEGATE_EXPR, type, rhs);
+	code = PLUS_EXPR;
+	  }
+
+	/* We need to detect overflows so we call size_binop here.  */
 	return size_binop (code, lhs, rhs);
   }
 
-- { dg-do compile }

with Text_IO; use Text_IO;

procedure Discr43 is

  type Arr is array (Short_Integer range <>) of Boolean;

  type Rec (LB : Short_Integer; UB : Short_Integer) is record
A : Arr (LB .. UB);
  end record;

begin
  Put_Line ("Arr'Max_Size =" & Arr'Max_Size_In_Storage_Elements'Img);
  Put_Line ("Rec'Max_Size =" & Rec'Max_Size_In_Storage_Elements'Img);
end;


Re: [PATCH v3] libiberty: cleanup Makefile.in

2015-05-28 Thread Ian Lance Taylor
On Thu, May 28, 2015 at 1:57 AM, Bernhard Reutner-Fischer
 wrote:
>
> +stamp-pic-ofiles: $(CFILES:%=$(srcdir)/%)

To the best of my knowledge, in POSIX make variable substitutions of
this form do not recognize % specially.  In POSIX make this kind of
substitution can only be used to change the file extension.  Also,
POSIX make does not permit a variable expansion in the right hand side
of the substitution.

Ian


Re: [Ada] Avoid use of secondary stack

2015-05-28 Thread Eric Botcazou
> This patch avoids the use of the secondary stack, and the corresponding
> cleanup handlers, in many cases. For example, access discriminants no
> longer force functions to return on the secondary stack. This is a speed
> improvement. It is particularly relevant to the Ada.Containers.

It also uncovered a buglet in gigi, fixed thusly, applied on the mainline.


2015-05-28  Eric Botcazou  

* gcc-interface/decl.c (gnat_to_gnu_entity) : Do
not error out on a return type which has a size that overflows if the
return is done by invisible reference.


-- 
Eric BotcazouIndex: gcc-interface/decl.c
===
--- gcc-interface/decl.c	(revision 223834)
+++ gcc-interface/decl.c	(working copy)
@@ -4224,7 +4224,8 @@ gnat_to_gnu_entity (Entity_Id gnat_entit
 	/* If the return type has a size that overflows, we cannot have
 	   a function that returns that type.  This usage doesn't make
 	   sense anyway, so give an error here.  */
-	if (TYPE_SIZE_UNIT (gnu_return_type)
+	if (!return_by_invisi_ref_p
+		&& TYPE_SIZE_UNIT (gnu_return_type)
 		&& TREE_CODE (TYPE_SIZE_UNIT (gnu_return_type)) == INTEGER_CST
 		&& !valid_constant_size_p (TYPE_SIZE_UNIT (gnu_return_type)))
 	  {


Re: [PATCH] Simple fix to enhance outer-loop vectorization.

2015-05-28 Thread Yuri Rumyantsev
Richard,

First of all, I don't think that it is possible to write out test for
outer-loop vectorization with zero-step reference because of possible
loop-carried dependencies and run-time aliasing is not supported for
outer-loop. If there are no such dependencies pre or pdse does
hoisting (sinking) of such invariant references. So I add a check on
it to accept zero-step references for outer loop marked with
forc-vectorize flag to guarantee absence of loop-carried dependencies
between inner-loop iterations.
I included run-time test that checks vectorization correctness.

Update patch is attached.
Yuri..

2015-05-28 14:39 GMT+03:00 Richard Biener :
> On Thu, May 28, 2015 at 1:00 PM, Yuri Rumyantsev  wrote:
>> Hi All,
>>
>> Here is a simple patch which removes restriction on outer-loop
>> vectorization -  allow references in inner-loop with zero step. This
>> case was found in one important benchmark.
>>
>> Bootstrap and regression testing did not show any new failures.
>> Is it OK for trunk.
>>
>> ChangeLog:
>> 2015-05-28  Yuri Rumyantsev  
>>
>> * tree-vect-data-refs.c (vect_analyze_data_ref_access): Allow
>> consecutive accesses within outer-loop vectorization for references
>> with zero step in inner-loop.
>>
>> gcc/testsuite/ChangeLog:
>> * gcc.dg/vect/fast-math-vect-outer-1.c: New test.
>
> Can you please add a non-omp-simd testcase that triggers this as well and that
> is a runtime testcase verifying the transform is correct?
>
> Also please don't add to the strange testcase-name machinery but just
> use { dg-additional-options "-ffast-math" }
>
> Index: tree-vect-data-refs.c
> ===
> --- tree-vect-data-refs.c   (revision 223653)
> +++ tree-vect-data-refs.c   (working copy)
> @@ -2261,7 +2261,6 @@
>return true;
>  }
>
> -
>  /* Analyze the access pattern of the data-reference DR.
> In case of non-consecutive accesses call vect_analyze_group_access() to
> analyze groups of accesses.  */
>
> spurious white-space change
>
>
> @@ -2291,14 +2290,8 @@
>if (loop_vinfo && integer_zerop (step))
>
> Surely the comment before this needs updating now.
>
>  {
>GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
> -  if (nested_in_vect_loop_p (loop, stmt))
> -   {
> - if (dump_enabled_p ())
> -   dump_printf_loc (MSG_NOTE, vect_location,
> -"zero step in inner loop of nest\n");
> - return false;
> -   }
> -  return DR_IS_READ (dr);
> +  if (!nested_in_vect_loop_p (loop, stmt))
> +   return DR_IS_READ (dr);
>  }
>
>if (loop && nested_in_vect_loop_p (loop, stmt))
>
> so what happens after the patch?  It would be nice to have a comment
> explaining what happens in the nested_in_vect_loop_p case for
> the case when the outer-loop step is zero and when it is not zero.
>
> In particular as you don't need any code generation changes - this hints
> at that you may miss something ;)
>
> Otherwise of course the patch is ok - lifting restrictions is good.
>
> Thanks,
> Richard.


patch.1
Description: Binary data


[PATCH][ARM/AArch64 Testsuite] Cleanup advsimd-intrinsics.exp, removing unnecessary loop

2015-05-28 Thread Alan Lawrence
I've tested this on aarch64, aarch64_be, and arm, and in all cases, the same 
tests are executed (whether running the whole advsimd-intrinsics.exp, or 
manually specifying a single file). AFAICT the loop, explicit runtest_file_p, 
and gcc_set_parallelization_enable, all stem from a point where we were calling 
c-torture-execute instead of or as well as gcc-dg-runtest. Now we have only the 
latter, it is quite capable of looping through tests itself (and correctly 
parallelizing them).


Ok for trunk?

Cheers, Alan


diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrins
index 583832a..19a982d 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
@@ -55,20 +55,8 @@ set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTI
 set additional_flags [add_options_for_arm_neon ""]

 # Main loop.
-foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
-# If we're only testing specific files and this isn't one of them, skip it.
-if ![runtest_file_p $runtests $src] then {
-   continue
-}
-
-# runtest_file_p is already run above, and the code below can run
-# runtest_file_p again, make sure everything for this test is
-# performed if the above runtest_file_p decided this runtest
-# instance should execute the test
-gcc_parallel_test_enable 0
-gcc-dg-runtest $src "" $additional_flags
-gcc_parallel_test_enable 1
-}
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
+  "" ${additional_flags}

 # All done.
 set dg-do-what-default ${save-dg-do-what-default}



Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Jeff Law

On 05/28/2015 04:42 AM, David Malcolm wrote:


Am I right in thinking that this is a statically-allocated object with a
non-trivial constructor?  i.e. that this constructor has to run before
"main" is entered?

Do our coding guidelines allow for this?  (I've been burned by this
before, on a buggy C++ runtime that didn't manage to support these).
I'm a little nervous about this, touching global state before
"main" (e.g. from the point-of-view of the JIT), though I don't know yet
if this is just a gut reaction, or if there's a valid concern here (I'm
officially on holiday this week, so I haven't had a chance to dig deeply
into these patches yet, sorry).
That idiom is used in various places by Martin's patches.   I didn't see 
a strong rhyme or reason behind why it was used over allocating 
something in automatic or heap storage.


As to supporting it, I'm not terribly concerned about other buggy C++ 
runtimes.  GCC bootstraps with GCC, which means we've got our C++ 
runtime.  The only worry becomes the low level bits that we build our 
static ctor/dtor support on top of -- and I haven't seen major problems 
with that for eons.


jeff


Re: [PATCH, RFC] New memory usage statistics infrastructure

2015-05-28 Thread Jeff Law

On 05/28/2015 06:29 AM, Martin Liška wrote:





Hello.

Thank you for pointing about missing copyright.
Following patch adds that.

Ready for trunk?

Yes.
jeff



Re: [PATCH 01/35] Introduce new type-based pool allocator.

2015-05-28 Thread Jeff Law

On 05/28/2015 06:49 AM, Martin Liška wrote:
.


This mechanism has been just adapted. I find it quite useful as we have
examples in source code where we
allocate same struct/class types from a various pool. For debugging
purpose, it helps to identify if
release operation is called for a correct pool.
I saw that you were following existing practice for the pools in the 
removal patch. I still don't like it as it makes mixing and matching 
objects harder when debugging gcc and if the structure is exposed for 
plugins, then we've got an unnecessary ABI plugin breakage.


I certainly understand how it's useful -- I'm not questioning that.  I'm 
questioning changing the size of structures on ENABLE_CHECKING.


My first inclination would be to include all that stuff unconditionally. 
 If that's too much overhead, then perhaps include the structure 
member, but not bother with any of the bookkeeping except for 
ENABLE_CHECKING.




Anyway, I would like to commit all these patches at once (one by one).
Thus, I'm going to wait for approval for the whole series before I'll
commit the set.
Quite reasonable -- I was mostly trying to make sure I understood the 
testing situation.


I think at this point the whole series is approved, so you can move forward.

jeff



[PATCH] Extend -fno-plt to normal non-PIC branches on x86

2015-05-28 Thread H.J. Lu
This patch extends -fno-plt to normal non-PIC calls on x86.  -fno-plt
works in 64-bit mode with the existing binutils.  For 32-bit, we need
the updated assembler and linker to support "call/jmp *foo@GOT" with
a new relocation different from R_386_GOT32 to indicate that this
relocation applies to indirect branches.  A configure time check is
added to verify that 32-bit assembler generates a known relocation
which is different from R_386_GOT32.  A new 32-bit relocaton is needed
since "call/jmp *foo@GOT" requires a different relocation from R_386_GOT32
which is used together with a GOT register in "call/jmp *foo@GOT(%reg)".

OK for master?

Thanks.

H.J.
---
* configure.ac (HAVE_AS_INDIRECT_BRANCH_VIA_GOT): New.  Defined
if 32-bit assembler generates a known relocation which is
different from R_386_GOT32.
* config.in: Regenerated.
* configure: Likewise.
* config/i386/i386.c (ix86_output_call_insn):  Extend -fno-plt
to normal non-PIC branches.
---
 gcc/config.in  | 14 ++
 gcc/config/i386/i386.c | 42 --
 gcc/configure  | 47 ++-
 gcc/configure.ac   | 18 +-
 4 files changed, 113 insertions(+), 8 deletions(-)

diff --git a/gcc/config.in b/gcc/config.in
index daaf906..0ee5c38 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -363,6 +363,12 @@
 #endif
 
 
+/* Define true if the assembler supports 'call *foo@GOT'. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_INDIRECT_BRANCH_VIA_GOT
+#endif
+
+
 /* Define if your assembler supports the Sun syntax for cmov. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_IX86_CMOV_SUN_SYNTAX
@@ -686,8 +692,8 @@
 #endif
 
 
-/* Define to 1 if we found a declaration for 'basename', otherwise define to
-   0. */
+/* Define to 1 if you have the declaration of `basename(const char*)', and to
+   0 if you don't. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_DECL_BASENAME
 #endif
@@ -963,8 +969,8 @@
 #endif
 
 
-/* Define to 1 if we found a declaration for 'strstr', otherwise define to 0.
-   */
+/* Define to 1 if you have the declaration of `strstr(const char*,const
+   char*)', and to 0 if you don't. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_DECL_STRSTR
 #endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e77cd04..5ca19f2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25611,7 +25611,26 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
   if (SIBLING_CALL_P (insn))
 {
   if (direct_p)
-   xasm = "%!jmp\t%P0";
+   {
+ if (!flag_plt
+ && !flag_pic
+ && !TARGET_MACHO
+ && !TARGET_SEH
+ && !TARGET_PECOFF)
+   {
+ /* Avoid PLT.  */
+ if (TARGET_64BIT)
+   xasm = "%!jmp\t*%p0@GOTPCREL(%%rip)";
+ else
+#ifdef HAVE_AS_INDIRECT_BRANCH_VIA_GOT
+   xasm = "%!jmp\t*%p0@GOT";
+#else
+   xasm = "%!jmp\t%P0";
+#endif
+   }
+ else
+   xasm = "%!jmp\t%P0";
+   }
   /* SEH epilogue detection requires the indirect branch case
 to include REX.W.  */
   else if (TARGET_SEH)
@@ -25654,7 +25673,26 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
 }
 
   if (direct_p)
-xasm = "%!call\t%P0";
+{
+  if (!flag_plt
+ && !flag_pic
+ && !TARGET_MACHO
+ && !TARGET_SEH
+ && !TARGET_PECOFF)
+   {
+ /* Avoid PLT.  */
+ if (TARGET_64BIT)
+   xasm = "%!call\t*%p0@GOTPCREL(%%rip)";
+ else
+#ifdef HAVE_AS_INDIRECT_BRANCH_VIA_GOT
+   xasm = "%!call\t*%p0@GOT";
+#else
+   xasm = "%!call\t%P0";
+#endif
+   }
+  else
+   xasm = "%!call\t%P0";
+}
   else
 xasm = "%!call\t%A0";
 
diff --git a/gcc/configure b/gcc/configure
index a9a76d6..4419035 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -25361,7 +25361,7 @@ $as_echo "#define HAVE_AS_IX86_DIFF_SECT_DELTA 1" 
>>confdefs.h
 
 fi
 
-# These two are used unconditionally by i386.[ch]; it is to be defined
+# These three are used unconditionally by i386.[ch]; it is to be defined
 # to 1 if the feature is present, 0 otherwise.
 as_ix86_gotoff_in_data_opt=
 if test x$gas = xyes; then
@@ -25407,6 +25407,51 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 
+as_ix86_indirect_branch_via_got_opt=
+if test x$gas = xyes; then
+  as_ix86_indirect_branch_via_got_opt="--32"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for call 
*foo@GOT" >&5
+$as_echo_n "checking assembler for call *foo@GOT... " >&6; }
+if test "${gcc_cv_as_ix86_indirect_branch_via_got+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_ix86_indirect_branch_via_got=no
+if test $in_tree_gas = yes; then
+if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 26 \) \* 1000 + 0`
+  then gcc_cv_as_ix86_indirect_branch_via_g

Re: [PATCH 3/4, libitm, sh]: Change gtm_futex_{wait,wake} to int

2015-05-28 Thread Uros Bizjak
On Thu, May 28, 2015 at 12:52 AM, Kaz Kojima  wrote:

>> * config/linux/sh/futex_bits.h (sys_futex0) Change operands
>> "op" and "val" to int.
>>
>> Untested.
>>
>> OK for mainline?
>
> OK.
>
> Although it looks obvious, I've confirmed that there is no build
> issue/regression on sh4-unknown-linux-gnu with it and the patch 1/4.

Thanks! I went ahead and commit the whole patch series to mainline SVN.

Uros.


Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Richard Biener
On May 28, 2015 7:06:36 PM GMT+02:00, Jeff Law  wrote:
>On 05/28/2015 04:42 AM, David Malcolm wrote:
>>
>> Am I right in thinking that this is a statically-allocated object
>with a
>> non-trivial constructor?  i.e. that this constructor has to run
>before
>> "main" is entered?
>>
>> Do our coding guidelines allow for this?  (I've been burned by this
>> before, on a buggy C++ runtime that didn't manage to support these).
>> I'm a little nervous about this, touching global state before
>> "main" (e.g. from the point-of-view of the JIT), though I don't know
>yet
>> if this is just a gut reaction, or if there's a valid concern here
>(I'm
>> officially on holiday this week, so I haven't had a chance to dig
>deeply
>> into these patches yet, sorry).
>That idiom is used in various places by Martin's patches.   I didn't
>see 
>a strong rhyme or reason behind why it was used over allocating 
>something in automatic or heap storage.
>
>As to supporting it, I'm not terribly concerned about other buggy C++ 
>runtimes.  GCC bootstraps with GCC, which means we've got our C++ 
>runtime.  The only worry becomes the low level bits that we build our 
>static ctor/dtor support on top of -- and I haven't seen major problems
>
>with that for eons.

But we've been trying to avoid this. And the jit might not be too happy about 
it either.

>jeff




Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Jakub Jelinek
On Thu, May 28, 2015 at 07:57:39PM +0200, Richard Biener wrote:
> But we've been trying to avoid this. And the jit might not be too happy about 
> it either.

Yeah, we should certainly try to avoid them, especially if it would affect
many variables having to be constructed.

Jakub


Re: [Patch, fortran, PR44672, v6] [F08] ALLOCATE with SOURCE and no array-spec

2015-05-28 Thread Mikael Morin
Le 28/05/2015 17:29, Andre Vehreschild a écrit :
> *** resolve_allocate_expr (gfc_expr *e, gfc_
> *** 7103,7112 
> --- 7103,7123 
> if (!ref2 || ref2->type != REF_ARRAY || ref2->u.ar.type == AR_FULL
> || (dimension && ref2->u.ar.dimen == 0))
>   {
> +   /* F08:C633.  */
> +   if (code->expr3)
> + {
> +   if (!gfc_notify_std (GFC_STD_F2008, "Array specification required "
> +"in ALLOCATE statement at %L", &e->where))
> + goto failure;
> +   *array_alloc_wo_spec = true;
> + }
> +   else
> + {
> gfc_error ("Array specification required in ALLOCATE statement "
>"at %L", &e->where);
> goto failure;
>   }
> + }
>   
> /* Make sure that the array section reference makes sense in the
>context of an ALLOCATE specification.  */
I think we can be a little be more user friendly with the gfc_notify_std
error message.
Something like:
ALLOCATE without array spec at %L
ALLOCATE with array bounds determined from SOURCE or MOLD at %L

> *** gfc_array_init_size (tree descriptor, in
> *** 5044,5053 
>lower == NULL=> lbound = 1, ubound = upper[n]
>upper[n] = NULL  => lbound = 1, ubound = lower[n]
>upper[n] != NULL => lbound = lower[n], ubound = upper[n]  */
> -   ubound = upper[n];
>   
> /* Set lower bound.  */
> gfc_init_se (&se, NULL);
> if (lower == NULL)
>   se.expr = gfc_index_one_node;
> else
> --- 5050,5063 
>lower == NULL=> lbound = 1, ubound = upper[n]
>upper[n] = NULL  => lbound = 1, ubound = lower[n]
>upper[n] != NULL => lbound = lower[n], ubound = upper[n]  */
>   
> /* Set lower bound.  */
> gfc_init_se (&se, NULL);
> +   if (expr3_desc != NULL_TREE)
> + se.expr = gfc_index_one_node;
> +   else
> + {
> +   ubound = upper[n];
> if (lower == NULL)
>   se.expr = gfc_index_one_node;
> else
> *** gfc_array_init_size (tree descriptor, in
> *** 5064,5069 
> --- 5074,5080 
> ubound = lower[n];
>   }
>   }
> + }
> gfc_conv_descriptor_lbound_set (descriptor_block, descriptor,
> gfc_rank_cst[n], se.expr);
> conv_lbound = se.expr;
You can avoid reindenting if the ubound = upper[n] statement is kept at
its original place.

> *** gfc_array_init_size (tree descriptor, in
> *** 5076,5085 
>   
> /* Set upper bound.  */
> gfc_init_se (&se, NULL);
> gcc_assert (ubound);
> gfc_conv_expr_type (&se, ubound, gfc_array_index_type);
> gfc_add_block_to_block (pblock, &se.pre);
> ! 
> gfc_conv_descriptor_ubound_set (descriptor_block, descriptor,
> gfc_rank_cst[n], se.expr);
> conv_ubound = se.expr;
> --- 5087,5111 
>   
> /* Set upper bound.  */
> gfc_init_se (&se, NULL);
> +   if (expr3_desc != NULL_TREE)
> + {
> +   /* Set the upper bound to be (desc.ubound - desc.lbound)+ 1.  */
> +   tmp = fold_build2_loc (input_location, MINUS_EXPR,
> +  gfc_array_index_type,
> +  gfc_conv_descriptor_ubound_get (
> +expr3_desc, gfc_rank_cst[n]),
> +  gfc_conv_descriptor_lbound_get (
> +expr3_desc, gfc_rank_cst[n]));
> +   se.expr = fold_build2_loc (input_location, PLUS_EXPR,
> +  gfc_array_index_type, tmp,
> +  gfc_index_one_node);
> + }
> +   else
> + {
> gcc_assert (ubound);
> gfc_conv_expr_type (&se, ubound, gfc_array_index_type);
> gfc_add_block_to_block (pblock, &se.pre);
> ! }
> gfc_conv_descriptor_ubound_set (descriptor_block, descriptor,
> gfc_rank_cst[n], se.expr);
> conv_ubound = se.expr;
Your one-based-ness problem was here, wasn't it?
I would rather copy directly lbound and ubound from expr3_desc to
descriptor.
If the source has non-one-based bounds, the above would produce wrong
bounds.

> *** gfc_trans_allocate (gfc_code * code)
> *** 5174,5185 
>   {
> if (!code->expr3->mold
> || code->expr3->ts.type == BT_CHARACTER
> !   || vtab_needed)
>   {
> /* Convert expr3 to a tree.  */
> gfc_init_se (&se, NULL);
> !   /* For all "simple" expression just get the descriptor or the
> !  reference, respectively, depending on the rank of the expr.  */
> if (code->expr3->rank != 0)
>   gfc_conv_expr_descriptor (&se, code->expr3);
> else
> --- 5175,5195 
>   {
> if (!code->expr3->mold
> || code->expr3->ts.ty

[patch] fix bootstrap on FreeBSD i386/arm

2015-05-28 Thread Andreas Tobler

All,

This patch restores bootstrap on i386-*-freebsd*.
The build was failing after the introduction of -std=c++98 
configure/build flag. The -std=c++98 enables strict_ansi and on FreeBSD 
the libc function atoll is not defined for this.


But the configure always stated atoll available.

A bit debugging showed, to my understanding now, the AC_CHECK_FUNCS only 
checks it a func is available, not more. While the gcc_AC_CHECK_DECLS 
really recognises the build flags and tells me that atoll w/o std=c++98 
is available and with std=c++98 it is not available.


So, the below patch addresses this and restores bootstrap on FreeBSD.

Ok for trunk?

Thanks,
Andreas

2015-05-28  Andreas Tobler  

* configure.ac: Move the atoll check from AC_CHECK_FUNCS to
gcc_AC_CHECK_DECLS.
* configure: Regenerate.

Index: configure.ac
===
--- configure.ac(revision 223845)
+++ configure.ac(working copy)
@@ -1149,7 +1149,7 @@
   fileno_unlocked fprintf_unlocked fputc_unlocked fputs_unlocked dnl
   fread_unlocked fwrite_unlocked getchar_unlocked getc_unlocked dnl
   putchar_unlocked putc_unlocked)
-AC_CHECK_FUNCS(times clock kill getrlimit setrlimit atoll atoq \
+AC_CHECK_FUNCS(times clock kill getrlimit setrlimit atoq \
popen sysconf strsignal getrusage nl_langinfo \
gettimeofday mbstowcs wcswidth mmap setlocale \
gcc_UNLOCKED_FUNCS madvise)
@@ -1213,7 +1213,7 @@
 #include "ansidecl.h"
 #include "system.h"])
 
-gcc_AC_CHECK_DECLS(getenv atol asprintf sbrk abort atof getcwd getwd \
+gcc_AC_CHECK_DECLS(getenv atol atoll asprintf sbrk abort atof getcwd getwd \
madvise stpcpy strnlen strsignal strverscmp \
strtol strtoul strtoll strtoull \
errno snprintf vsnprintf vasprintf malloc realloc calloc \


Re: [PATCH v3] libiberty: cleanup Makefile.in

2015-05-28 Thread Bernhard Reutner-Fischer
On 28 May 2015 at 17:48, Ian Lance Taylor  wrote:
> On Thu, May 28, 2015 at 1:57 AM, Bernhard Reutner-Fischer
>  wrote:
>>
>> +stamp-pic-ofiles: $(CFILES:%=$(srcdir)/%)
>
> To the best of my knowledge, in POSIX make variable substitutions of
> this form do not recognize % specially.  In POSIX make this kind of
> substitution can only be used to change the file extension.  Also,
> POSIX make does not permit a variable expansion in the right hand side
> of the substitution.

Pity. bmake (a port from some BSD make) does support it.

Either way, i'm withdrawing these patches and the idea to attempt to make
the pic/ and noasan/ handling prettier.

thanks anyway!

cheers,


Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread Sriraman Tallam
I have attached a patch that adds the new attribute "noplt".  Please review.

* config/i386/i386.c (avoid_plt_to_call): New function.
(ix86_output_call_insn): Generate indirect call for functions
marked with "noplt" attribute.
(attribute_spec ix86_attribute_): Define new attribute "noplt".
* doc/extend.texi: Document new attribute "noplt".
* gcc.target/i386/noplt-1.c: New testcase.
* gcc.target/i386/noplt-2.c: New testcase.



Thanks
Sri

On Fri, May 22, 2015 at 2:00 AM, Pedro Alves  wrote:
> On 05/21/2015 11:02 PM, Sriraman Tallam wrote:
>> On Thu, May 21, 2015 at 2:51 PM, Pedro Alves  wrote:
>>> On 05/21/2015 10:12 PM, Sriraman Tallam wrote:

 My original proposal, for x86_64 only, was to add
 -fno-plt=. This lets the user decide for which
 functions PLT must be avoided.  Let the compiler always generate an
 indirect call using call *func@GOTPCREL(%rip).  We could do this for
 non-PIC code too.  No need for linker fixups since this relies on the
 user to know that func is from a shared object.
>>>
>>> Having to pass function names on the command line seems like an odd
>>> interface.  E.g, you'll need to pass the mangled name for
>>> C++ functions.  Any reason this isn't a function attribute?
>>
>> It is not clear to me where I would stick the attribute.  Example
>> usage in foo.cc:
>>
>> #include
>>
>> int main() {
>>   int n = memcmp();
>> }
>>
>> I want memcmp to not go through PLT, do you propose explicitly
>> re-declaring it in foo.cc with the attribute?
>
> I guess you'd do:
>
> #include
>
> __attribute__((no_plt)) typeof (memcpy) memcpy;
>
> int main() {
>   int n = memcmp();
> }
>
> or even:
>
> #include
>
> int main() {
>   if (hotpath) {
> __attribute__((no_plt)) typeof (memcpy) memcpy;
> for (..) {
>   int n = memcmp();
> }
>   } else {
>   int n = memcmp();
>   }
> }
>
> or globally:
>
> $ cat no-plt/string.h:
> #include_next 
> __attribute__((no_plt)) typeof (memcpy) memcpy;
>
> $ gcc -I no-plt/ ...
>
> Thanks,
> Pedro Alves
>
* config/i386/i386.c (avoid_plt_to_call): New function.
(ix86_output_call_insn): Generate indirect call for functions
marked with "noplt" attribute.
(attribute_spec ix86_attribute_): Define new attribute "noplt".
* doc/extend.texi: Document new attribute "noplt".
* gcc.target/i386/noplt-1.c: New testcase.
* gcc.target/i386/noplt-2.c: New testcase.

Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 223720)
+++ config/i386/i386.c  (working copy)
@@ -25599,6 +25599,25 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx call
   return call;
 }
 
+/* Return true if the function being called was marked with attribute
+   "noplt".  If this function is defined, this should return false.  */
+static bool
+avoid_plt_to_call (rtx call_op)
+{
+  if (GET_CODE (call_op) != SYMBOL_REF
+  || SYMBOL_REF_LOCAL_P (call_op))
+return false;
+
+  tree symbol_decl = SYMBOL_REF_DECL (call_op);
+
+  if (symbol_decl != NULL_TREE
+  && TREE_CODE (symbol_decl) == FUNCTION_DECL
+  && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))
+return true;
+
+  return false;
+}
+
 /* Output the assembly for a call instruction.  */
 
 const char *
@@ -25611,7 +25630,12 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op
   if (SIBLING_CALL_P (insn))
 {
   if (direct_p)
-   xasm = "%!jmp\t%P0";
+   {
+ if (TARGET_64BIT && avoid_plt_to_call (call_op))
+   xasm = "jmp\t*%p0@GOTPCREL(%%rip)";
+ else
+   xasm = "jmp\t%P0";
+   }
   /* SEH epilogue detection requires the indirect branch case
 to include REX.W.  */
   else if (TARGET_SEH)
@@ -25654,7 +25678,12 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op
 }
 
   if (direct_p)
-xasm = "%!call\t%P0";
+{
+  if (TARGET_64BIT && avoid_plt_to_call (call_op))
+xasm = "call\t*%p0@GOTPCREL(%%rip)";
+  else
+xasm = "call\t%P0";
+}
   else
 xasm = "%!call\t%A0";
 
@@ -46628,6 +46657,9 @@ static const struct attribute_spec ix86_attribute_
 false },
   { "callee_pop_aggregate_return", 1, 1, false, true, true,
 ix86_handle_callee_pop_aggregate_return, true },
+  /* Attribute to avoid calling function via PLT.  */
+  { "noplt", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
+false },
   /* End element.  */
   { NULL,0, 0, false, false, false, NULL, false }
 };
Index: doc/extend.texi
===
--- doc/extend.texi (revision 223720)
+++ doc/extend.texi (working copy)
@@ -4858,6 +4858,13 @@ On x86-32 targets, the @code{stdcall} attribute ca
 assume that the called function pops off the stack space used to
 pass arguments, unless it takes a variable number of arguments.
 
+@item noplt
+@cindex @code{noplt} function attribute, x86-64
+@cindex functions 

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread H.J. Lu
On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam  wrote:
> I have attached a patch that adds the new attribute "noplt".  Please review.
>
> * config/i386/i386.c (avoid_plt_to_call): New function.
> (ix86_output_call_insn): Generate indirect call for functions
> marked with "noplt" attribute.
> (attribute_spec ix86_attribute_): Define new attribute "noplt".
> * doc/extend.texi: Document new attribute "noplt".
> * gcc.target/i386/noplt-1.c: New testcase.
> * gcc.target/i386/noplt-2.c: New testcase.
>

2 comments:

1. Don't remove "%!" prefix before call/jmp.  It is needed for MPX.
2. Don't you need to check

  && !TARGET_MACHO
  && !TARGET_SEH
  && !TARGET_PECOFF

since it only works for ELF.

-- 
H.J.


Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Martin Liška

On 05/28/2015 08:03 PM, Jakub Jelinek wrote:

On Thu, May 28, 2015 at 07:57:39PM +0200, Richard Biener wrote:

But we've been trying to avoid this. And the jit might not be too happy about 
it either.


Yeah, we should certainly try to avoid them, especially if it would affect
many variables having to be constructed.

Jakub



Ok, thus I will do it as before my modifications:

static pool_allocator  *update_cost_record_pool = NULL;

/* Initiate update cost records.  */
static void
init_update_cost_records (void)
{
 update_cost_record_pool = new pool_allocator 
   ("update cost records", 100);
}

I'm going to migrate rest of patches that use the same construct.

Thanks,
Martin



Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread Sriraman Tallam
On Thu, May 28, 2015 at 11:42 AM, H.J. Lu  wrote:
> On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam  wrote:
>> I have attached a patch that adds the new attribute "noplt".  Please review.
>>
>> * config/i386/i386.c (avoid_plt_to_call): New function.
>> (ix86_output_call_insn): Generate indirect call for functions
>> marked with "noplt" attribute.
>> (attribute_spec ix86_attribute_): Define new attribute "noplt".
>> * doc/extend.texi: Document new attribute "noplt".
>> * gcc.target/i386/noplt-1.c: New testcase.
>> * gcc.target/i386/noplt-2.c: New testcase.
>>
>
> 2 comments:
>
> 1. Don't remove "%!" prefix before call/jmp.  It is needed for MPX.
> 2. Don't you need to check
>
>   && !TARGET_MACHO
>   && !TARGET_SEH
>   && !TARGET_PECOFF
>
> since it only works for ELF.

Ok, I will make this change. OTOH, is it just better to piggy-back on
existing -fno-plt change by Alex in calls.c
and do this:

Index: calls.c
===
--- calls.c (revision 223720)
+++ calls.c (working copy)
@@ -226,9 +226,11 @@ prepare_call_address (tree fndecl_or_type, rtx fun
&& targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
   ? force_not_mem (memory_address (FUNCTION_MODE, funexp))
   : memory_address (FUNCTION_MODE, funexp));
-  else if (flag_pic && !flag_plt && fndecl_or_type
+  else if (fndecl_or_type
&& TREE_CODE (fndecl_or_type) == FUNCTION_DECL
-   && !targetm.binds_local_p (fndecl_or_type))
+   && !targetm.binds_local_p (fndecl_or_type)
+   && ((flag_pic && !flag_plt)
+   || (lookup_attribute ("noplt", DECL_ATTRIBUTES(fndecl_or_type)
 {
   funexp = force_reg (Pmode, funexp);
 }


Thanks
Sri

>
> --
> H.J.


Re: [patch 10/10] debug-early merge: compiler proper

2015-05-28 Thread Aldy Hernandez

On 05/27/2015 08:39 AM, Jason Merrill wrote:

On 05/20/2015 11:50 AM, Aldy Hernandez wrote:



+  /* Fill in the size of variable-length fields in late dwarf.  */
+  if (TREE_ASM_WRITTEN (type)
+  && !early_dwarf_dumping)
+{
+  tree member;
+  for (member = TYPE_FIELDS (type); member; member = DECL_CHAIN
(member))
+fill_variable_array_bounds (TREE_TYPE (member));
+  return;
+}


Why is this happening in late dwarf?  I'm concerned that front-end
information that is necessary to do this might be lost by that point.


I thought only after the optimizations had run their course would we be 
guaranteed to have accurate bound information.  At least, that's what my 
experience showed.


Do you have something else in mind?




+  /* Variable-length types may be incomplete even if
+ TREE_ASM_WRITTEN.  For such types, fall through to
+ gen_array_type_die() and possibly fill in
+ DW_AT_{upper,lower}_bound attributes.  */
+  if ((TREE_CODE (type) != ARRAY_TYPE
+   && TREE_CODE (type) != RECORD_TYPE
+   && TREE_CODE (type) != UNION_TYPE
+   && TREE_CODE (type) != QUAL_UNION_TYPE)
+  || (TYPE_SIZE (type)
+  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST))


Similarly, why check for INTEGER_CST here?


The INTEGER_CST check was supposed to mean "we have bound information 
already, no need to look further".


I guess we could have a variable length bound that does not decay to a 
constant.  Perhaps I could check the presence of a cached DIE with a 
type DIE containing a DW_TAG_subrange_type *and* 
DW_AT_{lower,upper}_bound ??.  Basically I just want to add bound 
information, if available and not already present.


Suggestions?

Aldy


Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-05-28 Thread H.J. Lu
On Thu, May 28, 2015 at 11:50 AM, Sriraman Tallam  wrote:
> On Thu, May 28, 2015 at 11:42 AM, H.J. Lu  wrote:
>> On Thu, May 28, 2015 at 11:34 AM, Sriraman Tallam  
>> wrote:
>>> I have attached a patch that adds the new attribute "noplt".  Please review.
>>>
>>> * config/i386/i386.c (avoid_plt_to_call): New function.
>>> (ix86_output_call_insn): Generate indirect call for functions
>>> marked with "noplt" attribute.
>>> (attribute_spec ix86_attribute_): Define new attribute "noplt".
>>> * doc/extend.texi: Document new attribute "noplt".
>>> * gcc.target/i386/noplt-1.c: New testcase.
>>> * gcc.target/i386/noplt-2.c: New testcase.
>>>
>>
>> 2 comments:
>>
>> 1. Don't remove "%!" prefix before call/jmp.  It is needed for MPX.
>> 2. Don't you need to check
>>
>>   && !TARGET_MACHO
>>   && !TARGET_SEH
>>   && !TARGET_PECOFF
>>
>> since it only works for ELF.
>
> Ok, I will make this change. OTOH, is it just better to piggy-back on
> existing -fno-plt change by Alex in calls.c
> and do this:
>
> Index: calls.c
> ===
> --- calls.c (revision 223720)
> +++ calls.c (working copy)
> @@ -226,9 +226,11 @@ prepare_call_address (tree fndecl_or_type, rtx fun
> && targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
>? force_not_mem (memory_address (FUNCTION_MODE, funexp))
>: memory_address (FUNCTION_MODE, funexp));
> -  else if (flag_pic && !flag_plt && fndecl_or_type
> +  else if (fndecl_or_type
> && TREE_CODE (fndecl_or_type) == FUNCTION_DECL
> -   && !targetm.binds_local_p (fndecl_or_type))
> +   && !targetm.binds_local_p (fndecl_or_type)
> +   && ((flag_pic && !flag_plt)
> +   || (lookup_attribute ("noplt", DECL_ATTRIBUTES(fndecl_or_type)
>  {
>funexp = force_reg (Pmode, funexp);
>  }
>

Does it work on non-PIC calls?

-- 
H.J.


Re: [PATCH][ARM/AArch64 Testsuite] Cleanup advsimd-intrinsics.exp, removing unnecessary loop

2015-05-28 Thread Christophe Lyon
On 28 May 2015 at 18:45, Alan Lawrence  wrote:
> I've tested this on aarch64, aarch64_be, and arm, and in all cases, the same
> tests are executed (whether running the whole advsimd-intrinsics.exp, or
> manually specifying a single file). AFAICT the loop, explicit
> runtest_file_p, and gcc_set_parallelization_enable, all stem from a point
> where we were calling c-torture-execute instead of or as well as
> gcc-dg-runtest. Now we have only the latter, it is quite capable of looping
> through tests itself (and correctly parallelizing them).
>
> Ok for trunk?

This looks OK, but why can't you also drop the other torture-related
lines as you did in your previous patch?
I mean:
load_lib c-torture.exp
load_lib torture-options.exp
etc...

Christophe.

>
> Cheers, Alan
>
>
> diff --git
> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrins
> index 583832a..19a982d 100644
> ---
> a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
> +++
> b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp
> @@ -55,20 +55,8 @@ set-torture-options $C_TORTURE_OPTIONS {{}}
> $LTO_TORTURE_OPTI
>  set additional_flags [add_options_for_arm_neon ""]
>
>  # Main loop.
> -foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
> -# If we're only testing specific files and this isn't one of them, skip
> it.
> -if ![runtest_file_p $runtests $src] then {
> -   continue
> -}
> -
> -# runtest_file_p is already run above, and the code below can run
> -# runtest_file_p again, make sure everything for this test is
> -# performed if the above runtest_file_p decided this runtest
> -# instance should execute the test
> -gcc_parallel_test_enable 0
> -gcc-dg-runtest $src "" $additional_flags
> -gcc_parallel_test_enable 1
> -}
> +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
> +  "" ${additional_flags}
>
>  # All done.
>  set dg-do-what-default ${save-dg-do-what-default}
>


Re: [PATCH] Optimize (CST1 << A) == CST2 (PR tree-optimization/66299)

2015-05-28 Thread Marc Glisse

On Thu, 28 May 2015, Marek Polacek wrote:


This PR points out that we weren't able to optimize 1 << x == 2 to just
x == 1.


Side note: if we are looking for extra patterns to simplify, llvm has an 
almost unlimited supply. Here are a few we don't seem to have (there are 
more where those came from), of course several need constraining / 
generalizing, it is just a list of hints I wrote for myself.


(A|B) & ~(A&B) -> A^B
(A | B) & ((~A) ^ B) -> (A & B)
(A & (~B)) | (A ^ B) -> (A ^ B)
((B | C) & A) | B -> B | (A & C)
A | ( A ^ B) -> A |  B
A | (~A ^ B) -> A | ~B
(A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
(A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
(A & B) | (A ^ B) -> (A | B)
A | ~(A ^ B) -> A | ~B
(A & B) | ((~A) ^ B) -> (~A ^ B)
~(~X & Y) -> (X | ~Y)
~(~X >>s Y) -> (X >>s Y)
(A & B)^(A | B) -> A ^ B
(A | ~B) ^ (~A | B) -> A ^ B
(A & ~B) ^ (~A & B) -> A ^ B
(A ^ C)^(A | B) -> ((~A) & B) ^ C
(A & B) ^ (A ^ B) -> (A | B)
(A & ~B) ^ (~A) -> ~(A & B)
(A&B)+(A^B) -> A|B
(A&B)+(A|B) -> A+B
(A|B)-(A^B) -> A&B
((X | Y) - X) -> (~X & Y)
fmax(x,NaN) -> x
fmax(a,fmax(a,b)) -> fmax(a,b)
(X+2) >u X -> x  X <= 4
((X & ~7) == 0) -> X < 8
2 * X < 5 -> X <= 2
((1 << x)&8) == 0 -> x != 3
((1 << x)&7) == 0 -> x > 2
Y - Z < X - Z -> Y < X
3 * X == 3 * Y -> X == Y
A >> 3 == B >> 3 -> (A ^ B) < 8
(float)int <= 4.4 -> int <= 4
x unle x -> x ord x



+/* (CST1 << A) == CST2 -> A == log2 (CST2 / CST1)
+   (CST1 << A) != CST2 -> A != log2 (CST2 / CST1)
+   if CST2 is a multiple of CST1.  */
+(for cmp (ne eq)
+ (simplify
+  (cmp (lshift@3 INTEGER_CST@0 @1) INTEGER_CST@2)
+  (if ((TREE_CODE (@3) != SSA_NAME || has_single_use (@3))
+   && wi::multiple_of_p (@2, @0, TYPE_SIGN (type)))


Doesn't "type" refer to the result of the EQ_EXPR here?


On Thu, 28 May 2015, Jakub Jelinek wrote:


Is CST2 a multiple of CST1 the best test though?
I mean say in
(0x8001U << x) == 0x2U
0x2U isn't a multiple of 0x8001U, yet there is only one
valid value of x for which it holds (17), so we could very well
optimize that to x == 17.
If popcount of the CST1 is 1, then multiple_of_p is supposedly sufficient
(have you checked if CST1 is negative that it still works?), for others
supposedly we could have a helper function that would just try
in a loop all shift counts from 0 to precision - 1, and note when
(CST1 << b) == CST2 - if for no b, then it should fold regardless of
has_single_use to false or true, if for exactly one shift count, then
use a comparison against that shift count, otherwise give up?


ctz(CST2)-ctz(CST1) should provide a single candidate without looping. 
ctz(CST1) is also relevant when CST2==0.


--
Marc Glisse


Re: Do less generous pointer globbing in alias.c

2015-05-28 Thread Jan Hubicka
hello,
only providing you the testcase why I need transitive closure of "contains
pointer" via the extra child I noticed that there is extra symmetry to handle:

 struct a {void *ptr;}
 char **ptr = (char **)&a.ptr;
 ptr = ...

This one doesn't really fly with my extra subset code, because ptr is not
universal pointer, but struct a contains one and thus should conflict with
every pointer.  Adding every pointer as subset of every structure with
universal pointer is impractical (childs of those structures would be appearing
as new pointer types get alias sets) and thus indeed it is better to handle it
same way as alias set 0 - by a special case in alias_set_subset_of
and alias_sets_conflict_p.

So I added the second flag - has_pointer that is transitive closure of
is_pointer and added the special case to alias_sets_conflict_p instead of 
adding the extra subset relation into the DAG.

I also added statistics and made changes you suggested (making child
hash to be possibly NULL and clenaing up alias set conflict construction)

I also constructed a testcase that covers all the new code paths.

The patch bootstrapped/regtested ppc64-linux.  I am not bound to teaching
next week, so if I hear no negative comments, I will schedule commiting the
patch for weekend to deal with possible fallout.

There are few cleanups possible incrementally - i.e. the hash set seems
irrationaly large for average type, we could avoid some pointer travelling
overhead and we could also do better at alias_sets_must_conflict_p.

Honza

* alias.c (alias_set_entry_d): Add is_pointer and has_pointer.
(alias_stats): Add num_universal.
(alias_set_subset_of): Special case pointers; be ready for NULL
children.
(alias_sets_conflict_p): Special case pointers; be ready for NULL
children.
(init_alias_set_entry): Break out from ...
(record_alias_subset): ... here; propagate new fields;
allocate children only when really needed.
(get_alias_set): Do less generous pointer globbing.
(dump_alias_stats_in_alias_c): Update statistics.
* gcc.dg/alias-8.c: Do not xfail.
* gcc.dg/pr62167.c: Prevent FRE.
* gcc.dg/alias-14.c: New testcase.
Index: alias.c
===
--- alias.c (revision 223772)
+++ alias.c (working copy)
@@ -183,10 +184,6 @@ struct GTY(()) alias_set_entry_d {
   /* The alias set number, as stored in MEM_ALIAS_SET.  */
   alias_set_type alias_set;
 
-  /* Nonzero if would have a child of zero: this effectively makes this
- alias set the same as alias set zero.  */
-  int has_zero_child;
-
   /* The children of the alias set.  These are not just the immediate
  children, but, in fact, all descendants.  So, if we have:
 
@@ -195,6 +192,17 @@ struct GTY(()) alias_set_entry_d {
  continuing our example above, the children here will be all of
  `int', `double', `float', and `struct S'.  */
   hash_map *children;
+
+  /* Nonzero if would have a child of zero: this effectively makes this
+ alias set the same as alias set zero.  */
+  bool has_zero_child;
+  /* Nonzero if alias set corresponds to pointer type itself (i.e. not to
+ aggregate contaiing pointer.
+ This is used for a special case where we need an universal pointer type
+ compatible with all other pointer types.  */
+  bool is_pointer;
+  /* Nonzero if is_pointer or if one of childs have has_pointer set.  */
+  bool has_pointer;
 };
 typedef struct alias_set_entry_d *alias_set_entry;
 
@@ -222,6 +230,7 @@ static struct {
   unsigned long long num_same_objects;
   unsigned long long num_volatile;
   unsigned long long num_dag;
+  unsigned long long num_universal;
   unsigned long long num_disambiguated;
 } alias_stats;
 
@@ -454,18 +463,58 @@ mems_in_disjoint_alias_sets_p (const_rtx
 bool
 alias_set_subset_of (alias_set_type set1, alias_set_type set2)
 {
-  alias_set_entry ase;
+  alias_set_entry ase2;
 
   /* Everything is a subset of the "aliases everything" set.  */
   if (set2 == 0)
 return true;
 
-  /* Otherwise, check if set1 is a subset of set2.  */
-  ase = get_alias_set_entry (set2);
-  if (ase != 0
-  && (ase->has_zero_child
- || ase->children->get (set1)))
+  /* Check if set1 is a subset of set2.  */
+  ase2 = get_alias_set_entry (set2);
+  if (ase2 != 0
+  && (ase2->has_zero_child
+ || (ase2->children && ase2->children->get (set1
 return true;
+
+  /* As a special case we consider alias set of "void *" to be both subset
+ and superset of every alias set of a pointer.  This extra symmetry does
+ not matter for alias_sets_conflict_p but it makes 
aliasing_component_refs_p
+ to return true on the following testcase:
+
+ void *ptr;
+ char **ptr2=(char **)&ptr;
+ *ptr2 = ...
+
+ Additionally if a set contains universal pointer, we consider every 
pointer
+ to be a subset of it, but we do not represent

Re: Statically-allocated objects with non-trivial ctors (was Re: [PATCH 33/35] Change use to type-based pool allocator in ira-color.c.)

2015-05-28 Thread Trevor Saunders
On Thu, May 28, 2015 at 08:47:16PM +0200, Martin Liška wrote:
> On 05/28/2015 08:03 PM, Jakub Jelinek wrote:
> >On Thu, May 28, 2015 at 07:57:39PM +0200, Richard Biener wrote:
> >>But we've been trying to avoid this. And the jit might not be too happy 
> >>about it either.
> >
> >Yeah, we should certainly try to avoid them, especially if it would affect
> >many variables having to be constructed.
> >
> > Jakub
> >
> 
> Ok, thus I will do it as before my modifications:
> 
> static pool_allocator  *update_cost_record_pool = NULL;
> 
> /* Initiate update cost records.  */
> static void
> init_update_cost_records (void)
> {
>  update_cost_record_pool = new pool_allocator 
>("update cost records", 100);
> }
> 
> I'm going to migrate rest of patches that use the same construct.


Hrm, why not just change pool_allocator so it does the first allocation
on the first alloc and just initializes everything to null / 0?  Then
the ctor would be close to trivial.  Then if you really care about the
stuff gcc doesn't optimize away you could add a special class
static_pool_allocator (you might also need to hack in a way to get the
c++ fe to do constexpr / defaulted functions).

Trev

> 
> Thanks,
> Martin
> 


Re: [patch] fix bootstrap on FreeBSD i386/arm

2015-05-28 Thread Jason Merrill

OK, thanks.

Jason


Re: debug mode maintenance patch

2015-05-28 Thread François Dumont

On 25/05/2015 20:41, Jonathan Wakely wrote:

On 25/05/15 15:31 +0200, François Dumont wrote:

Hi

   This is a patch to clean the debug mode code.

   I have introduced a new debug header, assertions.h, so that 
headers that only need _GLIBCXX_DEBUG_ASSERT do not have to include 
the big debug.h. I also introduce functions.tcc to isolate 
implementation of __foreign_iterator which require a number of other 
headers.


All other uses of .tcc extensions are headers included automatically
at the bottom of the corresponding .h header, so other headers never
need to do #include  because that appears at the end of
 (or  for standard headers).

Also, .tcc is meant to be for definitions of non-inline templates that
are declared (but not defined) in the header that includes the .tcc
file, but all the functions you're moving to  are
still small and inline.

If the point is just to move some functions to a different header
because not all consumers of functions.h need those functions then I
think it should have a different name, not pretend to be related to
functions.h


Ok, then I kept __foreign_iterator within functions.h. I checked and 
none of the headers included are themselves including debug headers, as 
long as it is like that it is fine.




I'm not convinced moving them to a separate header is even a good
idea. Surely most headers that include  already end
up including  and  anyway?



   * include/debug/debug.h ([_GLIBCXX_DEBUG_ASSERT,
   _GLIBCXX_DEBUG_PEDASSERT, _GLIBCXX_DEBUG_ONLY]): Move definition...


These names should not be in square brackets (square brackets are used
to indicate conditional changes, see
http://www.gnu.org/prep/standards/html_node/Conditional-Changes.html)


Sorry, I saw it used so many times for macros that I though it was the 
right way to report macro modifications.


I also replicate Copyrights from debug.h to assertions.h.

* include/debug/debug.h (_GLIBCXX_DEBUG_ASSERT,
_GLIBCXX_DEBUG_PEDASSERT, _GLIBCXX_DEBUG_ONLY): Move definition...
* include/debug/assertions.h: ...here, new.
* include/debug/formatter.h
(struct _Error_formatter::_Is_iterator_value_type): New.
(struct _Error_formatter::_Is_instance): New.
(struct _Error_formatter::_Parameter): Make public and not friend
anymore.
(_Error_formatter::_Parameter::__instance): New _M_kind enum entry.
(_Error_formatter::_Parameter::__iterator_value_type): New _M_kind enum
entry.
(struct _Error_formatter::_Parameter::_Type): New.
(struct _Error_formatter::_Parameter::_Instance): New, inherit from
latter.
(union _Error_formatter::_Parameter::_M_variant): Reorganize.
(_Parameter(_Iterator const&, const char*, _Is_iterator)): Make all
overloads take iterator through a const reference.
(_Parameter(const _Iterator&, const char*, _Is_iterator_value_type)):
New.
(_Parameter(const _Type&, const char*, _Is_instance)): New.
(_Error_formatter::_M_print_type): Delete.
(_Error_formatter::_M_iterator_value_type): New.
(_Error_formatter::_M_instance): New.
* include/Makefile.am: Add new above debug file.
* include/Makefile.in: Regenerate.
* include/debug/functions.h
(__check_dereferenceable(const _Safe_iterator<>&),
__valid_range(const _Safe_iterator<>&),
struct __is_safe_random_iterator<_Safe_iterator<>>): Move...
* include/debug/safe_iterator.h: ... here.
Replace debug.h include with assertions.h.
(__check_singular_aux): Move...
* include/debug/safe_base.h: ... here.
* include/debug/functions.h
(__check_dereferenceable(const _Safe_local_iterator<>&),
__valid_range(const _Safe_local_iterator<>&): Move...
* include/debug/safe_local_iterator.h: ...here.
* include/debug/safe_sequence.h: Replace debug.h with assertions.h.
Remove _Safe_iterator declaration.
* include/debug/safe_unordered_container.h: Replace debug.h with
assertions.h.
* include/debug/array: Replace safe_sequence.h include with
formatter.h and macros.h.
* include/debug/deque: Include functions.tcc.
* include/debug/forward_list: Likewise.
* include/debug/list: Likewise.
* include/debug/string: Likewise.
* include/debug/vector: Likewise.
* include/bits/unique_ptr.h: Replace debug.h include with new
assertions.h.
* include/bits/stl_iterator_base_funcs.h: Likewise.
* testsuite/23_containers/array/tuple_interface/get_debug_neg.cc:
Adjust dg-error line number.
* testsuite/23_containers/array/tuple_interface/
tuple_element_debug_neg.cc: Likewise.
* src/c++11/debug.cc: Adapt.

Tested under Linux x86_64.

Ok to commit ?

François

Index: include/Makefile.am
===
--- include/Makefile.am	(revision 223846)
+++ include/Makefile.am	(working copy)
@@ -759,6 +759,7 @@
 debug_builddir = ./debug
 debug_headers = \
 	${debug_srcdir}/array \
+	${debug_srcdir}/assertions.h \
 	${debug_srcdir}/bitset \
 	${debug_srcdir}/d

  1   2   >