[PATCH 04/11] make recog () take a rtx_insn *

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* config/v850/v850.c (expand_prologue): Adjust.
(expand_epilogue): Likewise.
* expr.c (init_expr_target): Likewise.
* genrecog.c (print_subroutine): Always make the argument type
rtx_insn *.
* recog.h: Adjust prototype.
---
 gcc/config/v850/v850.c | 4 ++--
 gcc/expr.c | 4 ++--
 gcc/genrecog.c | 8 +---
 gcc/recog.h| 2 +-
 4 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/gcc/config/v850/v850.c b/gcc/config/v850/v850.c
index 91e182f..c27bb6d 100644
--- a/gcc/config/v850/v850.c
+++ b/gcc/config/v850/v850.c
@@ -1741,7 +1741,7 @@ expand_prologue (void)
 
  v850_all_frame_related (save_all);
 
- code = recog (save_all, NULL_RTX, NULL);
+ code = recog (save_all, NULL, NULL);
  if (code >= 0)
{
  rtx insn = emit_insn (save_all);
@@ -1887,7 +1887,7 @@ expand_epilogue (void)
  offset -= 4;
}
 
- code = recog (restore_all, NULL_RTX, NULL);
+ code = recog (restore_all, NULL, NULL);
  
  if (code >= 0)
{
diff --git a/gcc/expr.c b/gcc/expr.c
index 0b0946d..5d19699 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -109,7 +109,7 @@ static HOST_WIDE_INT int_expr_size (tree);
 void
 init_expr_target (void)
 {
-  rtx insn, pat;
+  rtx pat;
   machine_mode mode;
   int num_clobbers;
   rtx mem, mem1;
@@ -125,7 +125,7 @@ init_expr_target (void)
  useless RTL allocations.  */
   reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
 
-  insn = rtx_alloc (INSN);
+  rtx_insn *insn = as_a (rtx_alloc (INSN));
   pat = gen_rtx_SET (NULL_RTX, NULL_RTX);
   PATTERN (insn) = pat;
 
diff --git a/gcc/genrecog.c b/gcc/genrecog.c
index a8e8c22..aa7f629 100644
--- a/gcc/genrecog.c
+++ b/gcc/genrecog.c
@@ -5102,8 +5102,7 @@ print_subroutine (output_state *os, state *s, int proc_id)
   /* For now, the top-level "recog" takes a plain "rtx", and performs a
  checked cast to "rtx_insn *" for use throughout the rest of the
  function and the code it calls.  */
-  const char *insn_param
-= proc_id > 0 ? "rtx_insn *insn" : "rtx uncast_insn";
+  const char *insn_param = "rtx_insn *insn";
   printf ("\n");
   switch (os->type)
 {
@@ -5142,11 +5141,6 @@ print_subroutine (output_state *os, state *s, int 
proc_id)
   if (proc_id == 0)
 {
   printf ("  recog_data.insn = NULL;\n");
-  if (os->type == RECOG)
-   {
- printf ("  rtx_insn *insn ATTRIBUTE_UNUSED;\n");
- printf ("  insn = safe_as_a  (uncast_insn);\n");
-   }
 }
   print_state (os, s, 2, true);
   printf ("}\n");
diff --git a/gcc/recog.h b/gcc/recog.h
index 3a59af8..9f6c42c 100644
--- a/gcc/recog.h
+++ b/gcc/recog.h
@@ -124,7 +124,7 @@ extern int offsettable_address_addr_space_p (int, 
machine_mode, rtx,
  ADDR_SPACE_GENERIC)
 extern bool mode_dependent_address_p (rtx, addr_space_t);
 
-extern int recog (rtx, rtx, int *);
+extern int recog (rtx, rtx_insn *, int *);
 #ifndef GENERATOR_FILE
 static inline int recog_memoized (rtx_insn *insn);
 #endif
-- 
2.9.3.dirty



[PATCH 00/11] more rtx_insn * stuff

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

Hi,

Basically $subject which gets rid of a few more casts over all.

I ment to get this out a little while back, but life got busy, and I didn't
read the status announcement properly, so virtually working from hawaii for
now. patches individually built and regtested on x86_64-linux-gnu, and series
run through config-list.mk, ok?

Thanks!

Trev

Trevor Saunders (11):
  use rtx_insn * more places where it is obvious
  split up variables to use rtx_insn * more
  make find_reg_equal_equiv_note take rtx_insn *
  make recog () take a rtx_insn *
  make replace_label_in_insn take labels as rtx_insn *
  make delete_insn () take a rtx_insn *
  remove cast from emit_libcall_block
  make prologue_epilogue_contains take a rtx_insn *
  make add_int_reg_note take rtx_insn *
  make dead_or_set_{,regno_}p take rtx_insn *
  make find_reg{,no}_fusage take rtx_insn *

 gcc/cfgrtl.c  |  5 ++--
 gcc/cfgrtl.h  |  2 +-
 gcc/config/aarch64/aarch64.c  |  4 +--
 gcc/config/alpha/alpha.c  |  8 +++---
 gcc/config/arc/arc.c  |  4 +--
 gcc/config/arm/arm-protos.h   |  2 +-
 gcc/config/arm/arm.c  | 24 +++--
 gcc/config/c6x/c6x.c  |  5 ++--
 gcc/config/darwin.c   |  3 +--
 gcc/config/frv/frv.c  |  4 +--
 gcc/config/frv/frv.md | 20 ++
 gcc/config/i386/i386-protos.h |  6 ++---
 gcc/config/i386/i386.c| 14 +-
 gcc/config/ia64/ia64.c|  2 +-
 gcc/config/mcore/mcore.c  |  2 +-
 gcc/config/mn10300/mn10300.c  |  9 ---
 gcc/config/rs6000/rs6000.c| 18 ++---
 gcc/config/s390/s390.c|  2 +-
 gcc/config/s390/s390.md   | 21 ++-
 gcc/config/spu/spu.c  |  7 +++--
 gcc/config/spu/spu.md |  6 ++---
 gcc/config/tilegx/tilegx.c|  3 ++-
 gcc/config/v850/v850.c|  4 +--
 gcc/cse.c | 63 ++-
 gcc/expr.c|  4 +--
 gcc/function.c| 12 -
 gcc/function.h|  6 ++---
 gcc/genrecog.c|  8 +-
 gcc/lower-subreg.c|  2 +-
 gcc/optabs.c  |  5 ++--
 gcc/optabs.h  |  2 +-
 gcc/recog.h   |  2 +-
 gcc/rtl.h | 22 ++-
 gcc/rtlanal.c | 35 +---
 34 files changed, 169 insertions(+), 167 deletions(-)

-- 
2.9.3.dirty



[PATCH 03/11] make find_reg_equal_equiv_note take rtx_insn *

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* cse.c (count_reg_usage): Adjust.
* rtl.h: Adjust prototypes.
* rtlanal.c (find_reg_equal_equiv_note): Change argument type to
rtx_insn *.
---
 gcc/cse.c | 63 +++
 gcc/rtl.h |  2 +-
 gcc/rtlanal.c |  2 +-
 3 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/gcc/cse.c b/gcc/cse.c
index 11b8fbe..a2d8b4f 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -6824,37 +6824,40 @@ count_reg_usage (rtx x, int *counts, rtx dest, int incr)
 case CALL_INSN:
 case INSN:
 case JUMP_INSN:
-  /* We expect dest to be NULL_RTX here.  If the insn may throw,
-or if it cannot be deleted due to side-effects, mark this fact
-by setting DEST to pc_rtx.  */
-  if ((!cfun->can_delete_dead_exceptions && !insn_nothrow_p (x))
- || side_effects_p (PATTERN (x)))
-   dest = pc_rtx;
-  if (code == CALL_INSN)
-   count_reg_usage (CALL_INSN_FUNCTION_USAGE (x), counts, dest, incr);
-  count_reg_usage (PATTERN (x), counts, dest, incr);
-
-  /* Things used in a REG_EQUAL note aren't dead since loop may try to
-use them.  */
-
-  note = find_reg_equal_equiv_note (x);
-  if (note)
-   {
- rtx eqv = XEXP (note, 0);
+  {
+   rtx_insn *insn = as_a (x);
+   /* We expect dest to be NULL_RTX here.  If the insn may throw,
+  or if it cannot be deleted due to side-effects, mark this fact
+  by setting DEST to pc_rtx.  */
+   if ((!cfun->can_delete_dead_exceptions && !insn_nothrow_p (x))
+   || side_effects_p (PATTERN (x)))
+ dest = pc_rtx;
+   if (code == CALL_INSN)
+ count_reg_usage (CALL_INSN_FUNCTION_USAGE (x), counts, dest, incr);
+   count_reg_usage (PATTERN (x), counts, dest, incr);
+
+   /* Things used in a REG_EQUAL note aren't dead since loop may try to
+  use them.  */
+
+   note = find_reg_equal_equiv_note (insn);
+   if (note)
+ {
+   rtx eqv = XEXP (note, 0);
 
- if (GET_CODE (eqv) == EXPR_LIST)
- /* This REG_EQUAL note describes the result of a function call.
-Process all the arguments.  */
-   do
- {
-   count_reg_usage (XEXP (eqv, 0), counts, dest, incr);
-   eqv = XEXP (eqv, 1);
- }
-   while (eqv && GET_CODE (eqv) == EXPR_LIST);
- else
-   count_reg_usage (eqv, counts, dest, incr);
-   }
-  return;
+   if (GET_CODE (eqv) == EXPR_LIST)
+ /* This REG_EQUAL note describes the result of a function call.
+Process all the arguments.  */
+ do
+   {
+ count_reg_usage (XEXP (eqv, 0), counts, dest, incr);
+ eqv = XEXP (eqv, 1);
+   }
+ while (eqv && GET_CODE (eqv) == EXPR_LIST);
+   else
+ count_reg_usage (eqv, counts, dest, incr);
+ }
+   return;
+  }
 
 case EXPR_LIST:
   if (REG_NOTE_KIND (x) == REG_EQUAL
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 7a44e3b..dc308f2 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3011,7 +3011,7 @@ extern int dead_or_set_p (const_rtx, const_rtx);
 extern int dead_or_set_regno_p (const_rtx, unsigned int);
 extern rtx find_reg_note (const_rtx, enum reg_note, const_rtx);
 extern rtx find_regno_note (const_rtx, enum reg_note, unsigned int);
-extern rtx find_reg_equal_equiv_note (const_rtx);
+extern rtx find_reg_equal_equiv_note (const rtx_insn *);
 extern rtx find_constant_src (const rtx_insn *);
 extern int find_reg_fusage (const_rtx, enum rtx_code, const_rtx);
 extern int find_regno_fusage (const_rtx, enum rtx_code, unsigned int);
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 4617e8e..7a89c03 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -2113,7 +2113,7 @@ find_regno_note (const_rtx insn, enum reg_note kind, 
unsigned int regno)
has such a note.  */
 
 rtx
-find_reg_equal_equiv_note (const_rtx insn)
+find_reg_equal_equiv_note (const rtx_insn *insn)
 {
   rtx link;
 
-- 
2.9.3.dirty



[PATCH 02/11] split up variables to use rtx_insn * more

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* config/aarch64/aarch64.c (aarch64_emit_unlikely_jump): split
up variables to make some rtx_insn *.
* config/alpha/alpha.c (emit_unlikely_jump): Likewise.
* config/arc/arc.c: Likewise.
* config/arm/arm.c: Likewise.
* config/mn10300/mn10300.c (mn10300_legitimize_pic_address):
Likewise.
* config/rs6000/rs6000.c (rs6000_expand_split_stack_prologue):
Likewise.
* config/spu/spu.c (spu_emit_branch_hint): Likewise.
---
 gcc/config/aarch64/aarch64.c |  4 ++--
 gcc/config/alpha/alpha.c |  8 +++-
 gcc/config/arc/arc.c |  4 ++--
 gcc/config/arm/arm.c |  4 ++--
 gcc/config/mn10300/mn10300.c |  9 +
 gcc/config/rs6000/rs6000.c   | 14 ++
 gcc/config/spu/spu.c |  7 +++
 7 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b7d4640..b6676f1 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11474,8 +11474,8 @@ aarch64_emit_unlikely_jump (rtx insn)
 {
   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
 
-  insn = emit_jump_insn (insn);
-  add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
+  rtx_insn *jump = emit_jump_insn (insn);
+  add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
 }
 
 /* Expand a compare and swap pattern.  */
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index 6d390ae..6c63a8f 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -4320,11 +4320,9 @@ static void
 emit_unlikely_jump (rtx cond, rtx label)
 {
   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
-  rtx x;
-
-  x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
-  x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
-  add_int_reg_note (x, REG_BR_PROB, very_unlikely);
+  rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
+  rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
 }
 
 /* A subroutine of the atomic operation splitters.  Emit a load-locked
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 5ba7ccc..dbaad24 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -9523,8 +9523,8 @@ emit_unlikely_jump (rtx insn)
 {
   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
 
-  insn = emit_jump_insn (insn);
-  add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
+  rtx_insn *jump = emit_jump_insn (insn);
+  add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
 }
 
 /* Expand code to perform a 8 or 16-bit compare and swap by doing
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index c2bc833..3e63330 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -26931,8 +26931,8 @@ emit_unlikely_jump (rtx insn)
 {
   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
 
-  insn = emit_jump_insn (insn);
-  add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
+  rtx_insn *jump = emit_jump_insn (insn);
+  add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
 }
 
 /* Expand a compare and swap pattern.  */
diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c
index e61bf40..cfc8604 100644
--- a/gcc/config/mn10300/mn10300.c
+++ b/gcc/config/mn10300/mn10300.c
@@ -1860,6 +1860,7 @@ rtx
 mn10300_legitimize_pic_address (rtx orig, rtx reg)
 {
   rtx x;
+  rtx_insn *insn;
 
   if (GET_CODE (orig) == LABEL_REF
   || (GET_CODE (orig) == SYMBOL_REF
@@ -1873,7 +1874,7 @@ mn10300_legitimize_pic_address (rtx orig, rtx reg)
   x = gen_rtx_CONST (SImode, x);
   emit_move_insn (reg, x);
 
-  x = emit_insn (gen_addsi3 (reg, reg, pic_offset_table_rtx));
+  insn = emit_insn (gen_addsi3 (reg, reg, pic_offset_table_rtx));
 }
   else if (GET_CODE (orig) == SYMBOL_REF)
 {
@@ -1885,12 +1886,12 @@ mn10300_legitimize_pic_address (rtx orig, rtx reg)
   x = gen_rtx_PLUS (SImode, pic_offset_table_rtx, x);
   x = gen_const_mem (SImode, x);
 
-  x = emit_move_insn (reg, x);
+  insn = emit_move_insn (reg, x);
 }
   else
 return orig;
 
-  set_unique_reg_note (x, REG_EQUAL, orig);
+  set_unique_reg_note (insn, REG_EQUAL, orig);
   return reg;
 }
 
@@ -3163,7 +3164,7 @@ mn10300_bundle_liw (void)
Insert a SETLB insn just before LABEL.  */
 
 static void
-mn10300_insert_setlb_lcc (rtx_insn *label, rtx branch)
+mn10300_insert_setlb_lcc (rtx_insn *label, rtx_insn *branch)
 {
   rtx lcc, comparison, cmp_reg;
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 0e05500..297df64 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -24641,11 +24641,9 @@ static void
 emit_unlikely_jump (rtx cond, rtx label)
 {
   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
-  rtx x;
-
-  x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
-  x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
-  add_int_reg_note (x, REG_BR_PROB, very_un

[PATCH 01/11] use rtx_insn * more places where it is obvious

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* config/arm/arm.c (legitimize_pic_address): Change to use
rtx_insn * as the type of variables.
(arm_pic_static_addr): Likewise.
(arm_emit_movpair): Likewise.
* config/c6x/c6x.c (reorg_split_calls): Likewise.
* config/darwin.c (machopic_legitimize_pic_address): Likewise.
* config/frv/frv.c (frv_optimize_membar_local): Likewise.
* config/frv/frv.md: Likewise.
* config/i386/i386-protos.h: Likewise.
* config/i386/i386.c (ix86_expand_split_stack_prologue):
Likewise.
(ix86_split_fp_branch): Likewise.
(predict_jump): Likewise.
* config/ia64/ia64.c: Likewise.
* config/mcore/mcore.c: Likewise.
* config/rs6000/rs6000.c (rs6000_legitimize_tls_address):
Likewise.
* config/s390/s390.c: Likewise.
* config/s390/s390.md: Likewise.
* config/spu/spu.md: Likewise.
* config/tilegx/tilegx.c (tilegx_legitimize_tls_address):
Likewise.
* lower-subreg.c (resolve_simple_move): Likewise.
---
 gcc/config/arm/arm.c  | 18 +++---
 gcc/config/c6x/c6x.c  |  5 +++--
 gcc/config/darwin.c   |  3 +--
 gcc/config/frv/frv.c  |  4 ++--
 gcc/config/frv/frv.md | 20 
 gcc/config/i386/i386-protos.h |  6 +++---
 gcc/config/i386/i386.c| 14 +++---
 gcc/config/ia64/ia64.c|  2 +-
 gcc/config/mcore/mcore.c  |  2 +-
 gcc/config/rs6000/rs6000.c|  4 ++--
 gcc/config/s390/s390.c|  2 +-
 gcc/config/s390/s390.md   | 21 ++---
 gcc/config/spu/spu.md |  6 +++---
 gcc/config/tilegx/tilegx.c|  3 ++-
 gcc/lower-subreg.c|  2 +-
 15 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 239117f..c2bc833 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -252,7 +252,7 @@ static bool arm_can_eliminate (const int, const int);
 static void arm_asm_trampoline_template (FILE *);
 static void arm_trampoline_init (rtx, tree, rtx);
 static rtx arm_trampoline_adjust_address (rtx);
-static rtx arm_pic_static_addr (rtx orig, rtx reg);
+static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
@@ -6903,8 +6903,6 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx 
reg)
   if (GET_CODE (orig) == SYMBOL_REF
   || GET_CODE (orig) == LABEL_REF)
 {
-  rtx insn;
-
   if (reg == 0)
{
  gcc_assert (can_create_pseudo_p ());
@@ -6917,6 +6915,7 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx 
reg)
 same segment as the GOT.  Unfortunately, the flexibility of linker
 scripts means that we can't be sure of that in general, so assume
 that GOTOFF is never valid on VxWorks.  */
+  rtx_insn *insn;
   if ((GET_CODE (orig) == LABEL_REF
   || (GET_CODE (orig) == SYMBOL_REF &&
   SYMBOL_REF_LOCAL_P (orig)))
@@ -7155,10 +7154,10 @@ arm_load_pic_register (unsigned long saved_regs 
ATTRIBUTE_UNUSED)
 }
 
 /* Generate code to load the address of a static var when flag_pic is set.  */
-static rtx
+static rtx_insn *
 arm_pic_static_addr (rtx orig, rtx reg)
 {
-  rtx l1, labelno, offset_rtx, insn;
+  rtx l1, labelno, offset_rtx;
 
   gcc_assert (flag_pic);
 
@@ -7175,8 +7174,7 @@ arm_pic_static_addr (rtx orig, rtx reg)
UNSPEC_SYMBOL_OFFSET);
   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
 
-  insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
-  return insn;
+  return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
 }
 
 /* Return nonzero if X is valid as an ARM state addressing register.  */
@@ -16928,8 +16926,6 @@ output_mov_long_double_arm_from_arm (rtx *operands)
 void
 arm_emit_movpair (rtx dest, rtx src)
  {
-  rtx insn;
-
   /* If the src is an immediate, simplify it.  */
   if (CONST_INT_P (src))
 {
@@ -16940,14 +16936,14 @@ arm_emit_movpair (rtx dest, rtx src)
  emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
   GEN_INT (16)),
 GEN_INT ((val >> 16) & 0x));
- insn = get_last_insn ();
+ rtx_insn *insn = get_last_insn ();
  set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
}
   return;
 }
emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
-   insn = get_last_insn ();
+   rtx_insn *insn = get_last_insn ();
set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
  }
 
diff --git a/gcc/config/c6x/c6x.c b/gcc/config/c6x/c6x.c
index 6cb9185

[PATCH 10/11] make dead_or_set_{,regno_}p take rtx_insn *

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* rtl.h: Adjust prototype.
* rtlanal.c (dead_or_set_p): Change argument type to rtx_insn *.
(dead_or_set_regno_p): Likewise.
---
 gcc/rtl.h | 4 ++--
 gcc/rtlanal.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/rtl.h b/gcc/rtl.h
index efb8127..03c1157 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3007,8 +3007,8 @@ extern void find_all_hard_regs (const_rtx, HARD_REG_SET 
*);
 extern void find_all_hard_reg_sets (const rtx_insn *, HARD_REG_SET *, bool);
 extern void note_stores (const_rtx, void (*) (rtx, const_rtx, void *), void *);
 extern void note_uses (rtx *, void (*) (rtx *, void *), void *);
-extern int dead_or_set_p (const_rtx, const_rtx);
-extern int dead_or_set_regno_p (const_rtx, unsigned int);
+extern int dead_or_set_p (const rtx_insn *, const_rtx);
+extern int dead_or_set_regno_p (const rtx_insn *, unsigned int);
 extern rtx find_reg_note (const_rtx, enum reg_note, const_rtx);
 extern rtx find_regno_note (const_rtx, enum reg_note, unsigned int);
 extern rtx find_reg_equal_equiv_note (const rtx_insn *);
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 75dde3d..9cd24bb 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -1943,7 +1943,7 @@ note_uses (rtx *pbody, void (*fun) (rtx *, void *), void 
*data)
by INSN.  */
 
 int
-dead_or_set_p (const_rtx insn, const_rtx x)
+dead_or_set_p (const rtx_insn *insn, const_rtx x)
 {
   unsigned int regno, end_regno;
   unsigned int i;
@@ -2017,7 +2017,7 @@ covers_regno_p (const_rtx dest, unsigned int test_regno)
 /* Utility function for dead_or_set_p to check an individual register. */
 
 int
-dead_or_set_regno_p (const_rtx insn, unsigned int test_regno)
+dead_or_set_regno_p (const rtx_insn *insn, unsigned int test_regno)
 {
   const_rtx pattern;
 
-- 
2.9.3.dirty



[PATCH 06/11] make delete_insn () take a rtx_insn *

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* cfgrtl.c (delete_insn): Change argument type to rtx_insn *.
(fixup_reorder_chain): Adjust.
* cfgrtl.h: Adjust prototype.
---
 gcc/cfgrtl.c | 5 ++---
 gcc/cfgrtl.h | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
index d2719db..d0aac09 100644
--- a/gcc/cfgrtl.c
+++ b/gcc/cfgrtl.c
@@ -123,9 +123,8 @@ can_delete_label_p (const rtx_code_label *label)
 /* Delete INSN by patching it out.  */
 
 void
-delete_insn (rtx uncast_insn)
+delete_insn (rtx_insn *insn)
 {
-  rtx_insn *insn = as_a  (uncast_insn);
   rtx note;
   bool really_delete = true;
 
@@ -3817,7 +3816,7 @@ fixup_reorder_chain (void)
  update_br_prob_note (bb);
  if (LABEL_NUSES (ret_label) == 0
  && single_pred_p (e_taken->dest))
-   delete_insn (ret_label);
+   delete_insn (as_a (ret_label));
  continue;
}
}
diff --git a/gcc/cfgrtl.h b/gcc/cfgrtl.h
index f4c1396..8e2c13c 100644
--- a/gcc/cfgrtl.h
+++ b/gcc/cfgrtl.h
@@ -20,7 +20,7 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_CFGRTL_H
 #define GCC_CFGRTL_H
 
-extern void delete_insn (rtx);
+extern void delete_insn (rtx_insn *);
 extern bool delete_insn_and_edges (rtx_insn *);
 extern void delete_insn_chain (rtx, rtx_insn *, bool);
 extern basic_block create_basic_block_structure (rtx_insn *, rtx_insn *,
-- 
2.9.3.dirty



[PATCH 05/11] make replace_label_in_insn take labels as rtx_insn *

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* rtl.h: Adjust prototype.
* rtlanal.c (replace_label_in_insn): Change argument type to
rtx_insn *.
---
 gcc/rtl.h | 2 +-
 gcc/rtlanal.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/rtl.h b/gcc/rtl.h
index dc308f2..c6c30b5 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3039,7 +3039,7 @@ extern void copy_reg_eh_region_note_backward (rtx, 
rtx_insn *, rtx);
 extern int inequality_comparisons_p (const_rtx);
 extern rtx replace_rtx (rtx, rtx, rtx, bool = false);
 extern void replace_label (rtx *, rtx, rtx, bool);
-extern void replace_label_in_insn (rtx_insn *, rtx, rtx, bool);
+extern void replace_label_in_insn (rtx_insn *, rtx_insn *, rtx_insn *, bool);
 extern bool rtx_referenced_p (const_rtx, const_rtx);
 extern bool tablejump_p (const rtx_insn *, rtx_insn **, rtx_jump_table_data 
**);
 extern int computed_jump_p (const rtx_insn *);
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 7a89c03..504b265 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -3079,8 +3079,8 @@ replace_label (rtx *loc, rtx old_label, rtx new_label, 
bool update_label_nuses)
 }
 
 void
-replace_label_in_insn (rtx_insn *insn, rtx old_label, rtx new_label,
-  bool update_label_nuses)
+replace_label_in_insn (rtx_insn *insn, rtx_insn *old_label,
+  rtx_insn *new_label, bool update_label_nuses)
 {
   rtx insn_as_rtx = insn;
   replace_label (&insn_as_rtx, old_label, new_label, update_label_nuses);
-- 
2.9.3.dirty



[PATCH 11/11] make find_reg{,no}_fusage take rtx_insn *

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* config/arm/arm-protos.h: Adjust prototype.
* config/arm/arm.c (use_return_insn): Change argument type to
rtx_insn *.
* rtl.h (is_a_helper ::test): New specialization.
* rtlanal.c (reg_set_p): Adjust.
(find_reg_fusage): Change argument type to rtx_insn *.
(find_regno_fusage): Likewise.
---
 gcc/config/arm/arm-protos.h |  2 +-
 gcc/config/arm/arm.c|  2 +-
 gcc/rtl.h   | 12 ++--
 gcc/rtlanal.c   | 23 +--
 4 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 95bae5e..539588b 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -23,7 +23,7 @@
 #define GCC_ARM_PROTOS_H
 
 extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
-extern int use_return_insn (int, rtx);
+extern int use_return_insn (int, rtx_insn *);
 extern bool use_simple_return_p (void);
 extern enum reg_class arm_regno_class (int);
 extern void arm_load_pic_register (unsigned long);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 3e63330..7f8ab8e 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3737,7 +3737,7 @@ arm_trampoline_adjust_address (rtx addr)
call.  SIBLING is the call insn, so we can examine its register usage.  */
 
 int
-use_return_insn (int iscond, rtx sibling)
+use_return_insn (int iscond, rtx_insn *sibling)
 {
   int regno;
   unsigned int func_type;
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 03c1157..f8b6b95 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -907,6 +907,14 @@ is_a_helper ::test (rtx rt)
 template <>
 template <>
 inline bool
+is_a_helper ::test (const_rtx rt)
+{
+  return CALL_P (rt);
+}
+
+template <>
+template <>
+inline bool
 is_a_helper ::test (rtx_insn *insn)
 {
   return CALL_P (insn);
@@ -3013,8 +3021,8 @@ extern rtx find_reg_note (const_rtx, enum reg_note, 
const_rtx);
 extern rtx find_regno_note (const_rtx, enum reg_note, unsigned int);
 extern rtx find_reg_equal_equiv_note (const rtx_insn *);
 extern rtx find_constant_src (const rtx_insn *);
-extern int find_reg_fusage (const_rtx, enum rtx_code, const_rtx);
-extern int find_regno_fusage (const_rtx, enum rtx_code, unsigned int);
+extern int find_reg_fusage (const rtx_insn *, enum rtx_code, const_rtx);
+extern int find_regno_fusage (const rtx_insn *, enum rtx_code, unsigned int);
 extern rtx alloc_reg_note (enum reg_note, rtx, rtx);
 extern void add_reg_note (rtx, enum reg_note, rtx);
 extern void add_int_reg_note (rtx_insn *, enum reg_note, int);
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 9cd24bb..e85da56 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -1196,14 +1196,17 @@ reg_set_p (const_rtx reg, const_rtx insn)
   /* We can be passed an insn or part of one.  If we are passed an insn,
  check if a side-effect of the insn clobbers REG.  */
   if (INSN_P (insn)
-  && (FIND_REG_INC_NOTE (insn, reg)
- || (CALL_P (insn)
- && ((REG_P (reg)
-  && REGNO (reg) < FIRST_PSEUDO_REGISTER
-  && overlaps_hard_reg_set_p (regs_invalidated_by_call,
-  GET_MODE (reg), REGNO (reg)))
- || MEM_P (reg)
- || find_reg_fusage (insn, CLOBBER, reg)
+  && FIND_REG_INC_NOTE (insn, reg))
+return true;
+
+  const rtx_call_insn *call = dyn_cast (insn);
+  if (call
+  && ((REG_P (reg)
+  && REGNO (reg) < FIRST_PSEUDO_REGISTER
+  && overlaps_hard_reg_set_p (regs_invalidated_by_call,
+  GET_MODE (reg), REGNO (reg)))
+ || MEM_P (reg)
+ || find_reg_fusage (call, CLOBBER, reg)))
 return true;
 
   return set_of (reg, insn) != NULL_RTX;
@@ -2165,7 +2168,7 @@ find_constant_src (const rtx_insn *insn)
in the CALL_INSN_FUNCTION_USAGE information of INSN.  */
 
 int
-find_reg_fusage (const_rtx insn, enum rtx_code code, const_rtx datum)
+find_reg_fusage (const rtx_insn *insn, enum rtx_code code, const_rtx datum)
 {
   /* If it's not a CALL_INSN, it can't possibly have a
  CALL_INSN_FUNCTION_USAGE field, so don't bother checking.  */
@@ -2210,7 +2213,7 @@ find_reg_fusage (const_rtx insn, enum rtx_code code, 
const_rtx datum)
in the CALL_INSN_FUNCTION_USAGE information of INSN.  */
 
 int
-find_regno_fusage (const_rtx insn, enum rtx_code code, unsigned int regno)
+find_regno_fusage (const rtx_insn *insn, enum rtx_code code, unsigned int 
regno)
 {
   rtx link;
 
-- 
2.9.3.dirty



[PATCH 08/11] make prologue_epilogue_contains take a rtx_insn *

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* function.c (contains): Change argument type to rtx_insn *.
(prologue_contains): Likewise.
(epilogue_contains): Likewise.
(prologue_epilogue_contains): Likewise.
* function.h: Adjust prototype.
---
 gcc/function.c | 12 ++--
 gcc/function.h |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/function.c b/gcc/function.c
index 0b1d168..ab76a26 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -142,7 +142,7 @@ extern tree debug_find_var_in_block_tree (tree, tree);
can always export `prologue_epilogue_contains'.  */
 static void record_insns (rtx_insn *, rtx, hash_table **)
  ATTRIBUTE_UNUSED;
-static bool contains (const_rtx, hash_table *);
+static bool contains (const rtx_insn *, hash_table *);
 static void prepare_function_start (void);
 static void do_clobber_return_reg (rtx, void *);
 static void do_use_return_reg (rtx, void *);
@@ -5733,7 +5733,7 @@ maybe_copy_prologue_epilogue_insn (rtx insn, rtx copy)
we can be running after reorg, SEQUENCE rtl is possible.  */
 
 static bool
-contains (const_rtx insn, hash_table *hash)
+contains (const rtx_insn *insn, hash_table *hash)
 {
   if (hash == NULL)
 return false;
@@ -5748,23 +5748,23 @@ contains (const_rtx insn, hash_table 
*hash)
   return false;
 }
 
-  return hash->find (const_cast (insn)) != NULL;
+  return hash->find (const_cast (insn)) != NULL;
 }
 
 int
-prologue_contains (const_rtx insn)
+prologue_contains (const rtx_insn *insn)
 {
   return contains (insn, prologue_insn_hash);
 }
 
 int
-epilogue_contains (const_rtx insn)
+epilogue_contains (const rtx_insn *insn)
 {
   return contains (insn, epilogue_insn_hash);
 }
 
 int
-prologue_epilogue_contains (const_rtx insn)
+prologue_epilogue_contains (const rtx_insn *insn)
 {
   if (contains (insn, prologue_insn_hash))
 return 1;
diff --git a/gcc/function.h b/gcc/function.h
index e854c7f..163ad0c 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -628,9 +628,9 @@ extern void clobber_return_register (void);
 extern void expand_function_end (void);
 extern rtx get_arg_pointer_save_area (void);
 extern void maybe_copy_prologue_epilogue_insn (rtx, rtx);
-extern int prologue_contains (const_rtx);
-extern int epilogue_contains (const_rtx);
-extern int prologue_epilogue_contains (const_rtx);
+extern int prologue_contains (const rtx_insn *);
+extern int epilogue_contains (const rtx_insn *);
+extern int prologue_epilogue_contains (const rtx_insn *);
 extern void record_prologue_seq (rtx_insn *);
 extern void record_epilogue_seq (rtx_insn *);
 extern void emit_return_into_block (bool simple_p, basic_block bb);
-- 
2.9.3.dirty



[PATCH 07/11] remove cast from emit_libcall_block

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* optabs.c (emit_libcall_block): Change argument type to
rtx_insn *.
* optabs.h: Adjust prototype.
---
 gcc/optabs.c | 5 ++---
 gcc/optabs.h | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/optabs.c b/gcc/optabs.c
index 7a1f025..2c7ca25 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -3681,10 +3681,9 @@ emit_libcall_block_1 (rtx_insn *insns, rtx target, rtx 
result, rtx equiv,
 }
 
 void
-emit_libcall_block (rtx insns, rtx target, rtx result, rtx equiv)
+emit_libcall_block (rtx_insn *insns, rtx target, rtx result, rtx equiv)
 {
-  emit_libcall_block_1 (safe_as_a  (insns),
-   target, result, equiv, false);
+  emit_libcall_block_1 (insns, target, result, equiv, false);
 }
 
 /* Nonzero if we can perform a comparison of mode MODE straightforwardly.
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 03fd94d..9ab8cb1 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -224,7 +224,7 @@ extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, 
enum rtx_code);
 extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 
 /* Emit code to make a call to a constant function or a library call.  */
-extern void emit_libcall_block (rtx, rtx, rtx, rtx);
+extern void emit_libcall_block (rtx_insn *, rtx, rtx, rtx);
 
 /* The various uses that a comparison can have; used by can_compare_p:
jumps, conditional moves, store flag operations.  */
-- 
2.9.3.dirty



[PATCH 09/11] make add_int_reg_note take rtx_insn *

2016-11-14 Thread tbsaunde+gcc
From: Trevor Saunders 

gcc/ChangeLog:

2016-11-14  Trevor Saunders  

* rtl.h: Adjust prototype.
* rtlanal.c (add_int_reg_note): Change argument type to rtx_insn *.
---
 gcc/rtl.h | 2 +-
 gcc/rtlanal.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/rtl.h b/gcc/rtl.h
index c6c30b5..efb8127 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -3017,7 +3017,7 @@ extern int find_reg_fusage (const_rtx, enum rtx_code, 
const_rtx);
 extern int find_regno_fusage (const_rtx, enum rtx_code, unsigned int);
 extern rtx alloc_reg_note (enum reg_note, rtx, rtx);
 extern void add_reg_note (rtx, enum reg_note, rtx);
-extern void add_int_reg_note (rtx, enum reg_note, int);
+extern void add_int_reg_note (rtx_insn *, enum reg_note, int);
 extern void add_shallow_copy_of_reg_note (rtx_insn *, rtx);
 extern rtx duplicate_reg_note (rtx);
 extern void remove_note (rtx_insn *, const_rtx);
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 504b265..75dde3d 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -2286,7 +2286,7 @@ add_reg_note (rtx insn, enum reg_note kind, rtx datum)
 /* Add an integer register note with kind KIND and datum DATUM to INSN.  */
 
 void
-add_int_reg_note (rtx insn, enum reg_note kind, int datum)
+add_int_reg_note (rtx_insn *insn, enum reg_note kind, int datum)
 {
   gcc_checking_assert (int_reg_note_p (kind));
   REG_NOTES (insn) = gen_rtx_INT_LIST ((machine_mode) kind,
-- 
2.9.3.dirty



[PATCH v2] Support ASan ODR indicators at compiler side.

2016-11-14 Thread Maxim Ostapenko

Hi,

this is the second attempt to support ASan odr indicators in GCC. I've 
fixed issues with several flags (e.g.TREE_ADDRESSABLE) and introduced 
new "asan odr indicator" attribute to distinguish indicators from other 
symbols.

Looks better now?

Tested and ASan bootstrapped on x86_64-unknown-linux-gnu.

-Maxim
config/

	* bootstrap-asan.mk: Replace LSAN_OPTIONS=detect_leaks=0 with
	ASAN_OPTIONS=detect_leaks=0:use_odr_indicator=1.

gcc/

	* asan.c (asan_global_struct): Refactor.
	(create_odr_indicator): New function.
	(asan_needs_odr_indicator_p): Likewise.
	(is_odr_indicator): Likewise.
	(asan_add_global): Introduce odr_indicator_ptr. Pass it into global's
	constructor.
	(asan_protect_global): Do not protect odr indicators.

gcc/c-family/

	* c-attribs.c (asan odr indicator): New attribute.
	(handle_asan_odr_indicator_attribute): New function.

gcc/testsuite/

	* c-c++-common/asan/no-redundant-odr-indicators-1.c: New test.

diff --git a/config/ChangeLog b/config/ChangeLog
index 3b0092b..0c75185 100644
--- a/config/ChangeLog
+++ b/config/ChangeLog
@@ -1,3 +1,8 @@
+2016-11-14  Maxim Ostapenko  
+
+	* bootstrap-asan.mk: Replace LSAN_OPTIONS=detect_leaks=0 with
+	ASAN_OPTIONS=detect_leaks=0:use_odr_indicator=1.
+
 2016-06-21  Trevor Saunders  
 
 	* elf.m4: Remove interix support.
diff --git a/config/bootstrap-asan.mk b/config/bootstrap-asan.mk
index 70baaf9..e73d4c2 100644
--- a/config/bootstrap-asan.mk
+++ b/config/bootstrap-asan.mk
@@ -1,7 +1,7 @@
 # This option enables -fsanitize=address for stage2 and stage3.
 
 # Suppress LeakSanitizer in bootstrap.
-export LSAN_OPTIONS="detect_leaks=0"
+export ASAN_OPTIONS=detect_leaks=0:use_odr_indicator=1
 
 STAGE2_CFLAGS += -fsanitize=address
 STAGE3_CFLAGS += -fsanitize=address
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a76e3e8..64744b9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2016-11-14  Maxim Ostapenko  
+
+	* asan.c (asan_global_struct): Refactor.
+	(create_odr_indicator): New function.
+	(asan_needs_odr_indicator_p): Likewise.
+	(is_odr_indicator): Likewise.
+	(asan_add_global): Introduce odr_indicator_ptr. Pass it into global's
+	constructor.
+	(asan_protect_global): Do not protect odr indicators.
+
 2016-11-09  Kugan Vivekanandarajah  
 
 	* ipa-cp.c (ipa_get_jf_pass_through_result): Handle unary expressions.
diff --git a/gcc/asan.c b/gcc/asan.c
index 6e93ea3..1191ebe 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -1388,6 +1388,16 @@ asan_needs_local_alias (tree decl)
   return DECL_WEAK (decl) || !targetm.binds_local_p (decl);
 }
 
+/* Return true if DECL, a global var, is an artificial ODR indicator symbol
+   therefore doesn't need protection.  */
+
+static bool
+is_odr_indicator (tree decl)
+{
+  return DECL_ARTIFICIAL (decl)
+	 && lookup_attribute ("asan odr indicator", DECL_ATTRIBUTES (decl));
+}
+
 /* Return true if DECL is a VAR_DECL that should be protected
by Address Sanitizer, by appending a red zone with protected
shadow memory after it and aligning it to at least
@@ -1436,7 +1446,8 @@ asan_protect_global (tree decl)
   || ASAN_RED_ZONE_SIZE * BITS_PER_UNIT > MAX_OFILE_ALIGNMENT
   || !valid_constant_size_p (DECL_SIZE_UNIT (decl))
   || DECL_ALIGN_UNIT (decl) > 2 * ASAN_RED_ZONE_SIZE
-  || TREE_TYPE (decl) == ubsan_get_source_location_type ())
+  || TREE_TYPE (decl) == ubsan_get_source_location_type ()
+  || is_odr_indicator (decl))
 return false;
 
   rtl = DECL_RTL (decl);
@@ -2266,14 +2277,15 @@ asan_dynamic_init_call (bool after_p)
 static tree
 asan_global_struct (void)
 {
-  static const char *field_names[8]
+  static const char *field_names[]
 = { "__beg", "__size", "__size_with_redzone",
-	"__name", "__module_name", "__has_dynamic_init", "__location", "__odr_indicator"};
-  tree fields[8], ret;
-  int i;
+	"__name", "__module_name", "__has_dynamic_init", "__location",
+	"__odr_indicator"};
+  tree fields[ARRAY_SIZE (field_names)], ret;
+  unsigned i;
 
   ret = make_node (RECORD_TYPE);
-  for (i = 0; i < 8; i++)
+  for (i = 0; i < ARRAY_SIZE (field_names); i++)
 {
   fields[i]
 	= build_decl (UNKNOWN_LOCATION, FIELD_DECL,
@@ -2295,6 +2307,56 @@ asan_global_struct (void)
   return ret;
 }
 
+/* Create and return odr indicator symbol for DECL.
+   TYPE is __asan_global struct type as returned by asan_global_struct.  */
+
+static tree
+create_odr_indicator (tree decl, tree type)
+{
+  char sym_name[100], tmp_name[100];
+  static int lasan_odr_ind_cnt = 0;
+  tree uptr = TREE_TYPE (DECL_CHAIN (TYPE_FIELDS (type)));
+
+  snprintf (tmp_name, sizeof (tmp_name), "__odr_asan_%s_",
+	IDENTIFIER_POINTER (DECL_NAME (decl)));
+  ASM_GENERATE_INTERNAL_LABEL (sym_name, tmp_name, ++lasan_odr_ind_cnt);
+  char *asterisk = sym_name;
+  while ((asterisk = strchr (asterisk, '*')))
+*asterisk = '_';
+  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (sym_name),
+			 char_type_node);
+  TREE_ADDRESSABLE (var) = 1;
+  TREE_READONLY (var) = 0;
+  TREE_THIS_VOL

Re: [PATCH] PR fortran/78300 -- class procedure as actual arg

2016-11-14 Thread Janus Weil
Hi Steve,

> The attach patch allows a procedure with a class result to
> be an actual argument to subprogram where the dummy argument
> expected to be a class.  OK to commit?

that patch actually does not look quite right to me. Does it survive a regtest?

I think one should rather check why the class_ok attribute is not set
in the first place, any maybe apply a fix in gfc_build_class_symbol.

Cheers,
Janus


[PATCH] libiberty: Fix some demangler crashes caused by reading past end of input.

2016-11-14 Thread Mark Wielaard
In various situations the cplus_demangle () function could read past the
end of input causing crashes. Add checks in various places to not advance
the demangle string location and fail early when end of string is reached.
Add various examples of input strings to the testsuite that would crash
test-demangle before the fixes.

Found by using the American Fuzzy Lop (afl) fuzzer.

libiberty/ChangeLog:

   * cplus-dem.c (demangle_signature): After 'H', template function,
   no success and don't advance position if end of string reached.
   (demangle_template): After 'z', template name, return zero on
   premature end of string.
   (gnu_special): Guard strchr against searching for zero characters.
   (do_type): If member, only advance mangled string when 'F' found.
   * testsuite/demangle-expected: Add examples of strings that could
   crash the demangler by reading past end of input.
---
 
diff --git a/libiberty/cplus-dem.c b/libiberty/cplus-dem.c
index 8a699ee..0386da5 100644
--- a/libiberty/cplus-dem.c
+++ b/libiberty/cplus-dem.c
@@ -1697,7 +1697,10 @@ demangle_signature (struct work_stuff *work,
   0);
  if (!(work->constructor & 1))
expect_return_type = 1;
- (*mangled)++;
+ if (!**mangled)
+   success = 0;
+ else
+   (*mangled)++;
  break;
}
  /* fall through */
@@ -2176,6 +2179,8 @@ demangle_template (struct work_stuff *work, const char 
**mangled,
{
  int idx;
  (*mangled)++;
+ if (**mangled == '\0')
+   return (0);
  (*mangled)++;
 
  idx = consume_count_with_underscores (mangled);
@@ -3020,7 +3025,7 @@ gnu_special (struct work_stuff *work, const char 
**mangled, string *declp)
   int success = 1;
   const char *p;
 
-  if ((*mangled)[0] == '_'
+  if ((*mangled)[0] == '_' && (*mangled)[1] != '\0'
   && strchr (cplus_markers, (*mangled)[1]) != NULL
   && (*mangled)[2] == '_')
 {
@@ -3034,7 +3039,7 @@ gnu_special (struct work_stuff *work, const char 
**mangled, string *declp)
&& (*mangled)[3] == 't'
&& (*mangled)[4] == '_')
   || ((*mangled)[1] == 'v'
-  && (*mangled)[2] == 't'
+  && (*mangled)[2] == 't' && (*mangled)[3] != '\0'
   && strchr (cplus_markers, (*mangled)[3]) != NULL)))
 {
   /* Found a GNU style virtual table, get past "_vt"
@@ -3804,11 +3809,12 @@ do_type (struct work_stuff *work, const char **mangled, 
string *result)
break;
  }
 
-   if (*(*mangled)++ != 'F')
+   if (*(*mangled) != 'F')
  {
success = 0;
break;
  }
+   (*mangled)++;
  }
if ((member && !demangle_nested_args (work, mangled, &decl))
|| **mangled != '_')
diff --git a/libiberty/testsuite/demangle-expected 
b/libiberty/testsuite/demangle-expected
index 5badc3e..236161c 100644
--- a/libiberty/testsuite/demangle-expected
+++ b/libiberty/testsuite/demangle-expected
@@ -4606,3 +4606,23 @@ void f(void (*)(int) noexcept)
 
 _Z1fIvJiELb0EEvPDwiEFT_DpT0_E
 void f(void (*)(int) throw(int))
+
+# Could crash
+_
+_
+
+# Could crash
+_vt
+_vt
+
+# Could crash
+_$_1Acitz
+_$_1Acitz
+
+# Could crash
+_$_H1R
+_$_H1R
+
+# Could crash
+_Q8ccQ4M2e.
+_Q8ccQ4M2e.
-- 
1.8.3.1



Re: [PATCH, RFC] Improve ivopts group costs

2016-11-14 Thread Bin.Cheng
On Sat, Nov 12, 2016 at 8:36 AM, Evgeny Kudryashov  wrote:
> On 2016-11-10 13:30, Bin.Cheng wrote:
>>
>> Hi,
>> I see the cost problem with your test now.  When computing an address
>> type iv_use with a candidate, the computation consists of two parts,
>> for computation can be represented by addressing mode, it is done in
>> memory reference; for computation cannot be represented by addressing
>> mode, it is done outside of memory reference.  The final cost is added
>> up from the two computation parts.
>> For address iv_use:
>> MEM[base + biv << scale + offset]
>> when it is computed with below candidate on target only supports [base
>> + biv << scale] addressing mode:
>> biv
>> The computations would be like:
>> base' = base + offset
>> MEM[base' + biv << scale]
>> Both computations has its own cost, the first one is normal RTX cost,
>> the second one is addressing mode cost.  Final cost is added up from
>> both parts.
>>
>> Normally, all these cost should be added up in cost model, but there
>> should be one exception found in your test: If iv_uses of a group has
>> exactly the same iv ({base, step}), the first part computation (RTX)
>> can be shared among all iv_uses, thus the cost should only counted one
>> time.  That is, we should be able to model such CSE opportunities.
>> Apparently, we can't CSE the second part computation, of course there
>> won't be CSE opportunities in address expression anyway.
>
>
> Hi Bin,
> Yes, that is exactly what happens. And this computation might be cheaper
> than initialization and increment of new iv and it would be more preferable.
>
>> That said, this patch should make difference between cost of RTX
>> computation and address expression, and only add up RTX cost once if
>> it can be CSEed.  Well, it might be not trivial to check CSE
>> opportunities of RTX computation, for example, some iv_uses of the
>> group are the same, others are not.
>>
>> Thanks,
>> bin
>
>
> Since uses in a given group have the same base and step, they can only
> differ by offsets. Among those, equivalent offsets can be CSE'd. Then,
> perhaps it's possible to use a hash set of unique offsets in this group cost
> estimation loop, and count RTX computation cost only when adding a new entry
> to the set. What do you think about this approach?
We can start handling groups with exactly the same uses.  When
constructing groups, record a flag indicating the group only has the
same uses; when computing cost, accumulate RTX computation cost once
for flagged groups.  The rationale is: use/cand cost computation in
IVOPT is complicated and inaccurate, it's doesn't make much sense
trying to do fine-tuning based on such costs.  It often results in
Brownian-movement (I am not sure if that's the word).  Moreover, we
may want to further restrict to single basic block iv_uses when
flagging groups.
BTW, it maybe non-trivial to compute costs of RTX computation and
address expression separately.   Such costs are computed and
accumulated together in get_address_cost.

>
> While working on this issue, I've found another problem: that costs may
> become negative. That looks unintended, I have filed a new bug:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78332
It could be improved, but not a functional bug IMHO.  I will comment on the PR.

Thanks,
bin


Some backward threader refactoring

2016-11-14 Thread Jeff Law



I was looking at the possibility of dropping threading from VRP1/VRP2 or 
DOM1/DOM2 in favor of the backwards threader -- the obvious idea being 
to recover some compile-time for gcc-7.


Of the old-style threader passes (VRP1, VRP2, DOM1, DOM2), VRP2 is by 
far the least useful.  But I can't see a path to removing it in the 
gcc-7 timeframe.


Looking at what is caught by VRP and DOM threaders is quite interesting. 
 VRP obviously catches stuff with ranges, some fairly complex.  While 
you might think that querying range info in the backwards threader would 
work, the problem is we lose way too much information as we drop 
ASSERT_EXPRs.  (Recall that the threader runs while we're still in VRP 
and thus has access to the ASSERT_EXPRs).


The DOM threaders catch stuff through state, simplifications and 
bi-directional propagation of equivalences created by conditionals.


The most obvious limitation of the backwards walking threader is that it 
only looks at PHIs, copies and constant initializations.  Other 
statements are ignored and stop the backwards walk.


I've got a fair amount of support for walking through unary and limited 
form binary expressions that I believe can be extended based on needs. 
But that's not quite ready for stage1 close.  However, some of the 
refactoring to make those changes easier to implement is ready.


This patch starts to break down fsm_find_control_statement_thread_paths 
into more manageable hunks.


One such hunk is sub-path checking.  Essentially we're looking to add a 
range of blocks to the thread path as we move from one def site to 
another in the IL.  There aren't any functional changes in that 
refactoring.  It's really just to make f_f_c_s_t_p easier to grok.


f_f_c_s_t_p has inline code to recursively walk backwards through PHI 
nodes as well as assignments that are copies and constant initialization 
terminals.  Pulling that handling out results in a f_f_c_s_t_p that fits 
on a page.  It's just a hell of a lot easier to see what's going on.


The handling of assignments is slightly improved in this patch. 
Essentially we only considered a const initialization using an 
INTEGER_CST as a proper terminal node.  But certainly other constants 
are useful -- ADDR_EXPR in particular and are now handled.  I'll mirror 
that improvement in the PHI node routines tomorrow.


Anyway, this is really just meant to make it easier to start extending 
the GIMPLE_ASSIGN handling.


Bootstrapped and regression tested on x86_64-linux-gnu.

I've got function comments for the new routines on a local branch.  I'll 
get those installed before committing.


Jeff
* tree-ssa-threadbackward.c (fsm_find_thread_path): Remove unneeded
parameter.  Callers changed.
(check-subpath_and_update_thread_path): Extracted from
fsm_find_control_statement_thread_paths.
(handle_phi, handle_assignment, handle_assignment_p): Likewise.
(handle_assignment): Allow any constant node, not just INTEGER_CST.


diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index fd7d855..9ff3d75 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -62,14 +62,12 @@ get_gimple_control_stmt (basic_block bb)
 /* Return true if the CFG contains at least one path from START_BB to END_BB.
When a path is found, record in PATH the blocks from END_BB to START_BB.
VISITED_BBS is used to make sure we don't fall into an infinite loop.  Bound
-   the recursion to basic blocks belonging to LOOP.
-   SPEED_P indicate that we could increase code size to improve the code path 
*/
+   the recursion to basic blocks belonging to LOOP.  */
 
 static bool
 fsm_find_thread_path (basic_block start_bb, basic_block end_bb,
  vec *&path,
- hash_set *visited_bbs, loop_p loop,
- bool speed_p)
+ hash_set *visited_bbs, loop_p loop)
 {
   if (loop != start_bb->loop_father)
 return false;
@@ -85,8 +83,7 @@ fsm_find_thread_path (basic_block start_bb, basic_block 
end_bb,
   edge e;
   edge_iterator ei;
   FOR_EACH_EDGE (e, ei, start_bb->succs)
-   if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop,
- speed_p))
+   if (fsm_find_thread_path (e->dest, end_bb, path, visited_bbs, loop))
  {
vec_safe_push (path, start_bb);
return true;
@@ -427,6 +424,196 @@ convert_and_register_jump_thread_path (vec *path,
   --max_threaded_paths;
 }
 
+/* While following a chain of SSA_NAME definitions, we jumped from a definition
+   in LAST_BB to a definition in VAR_BB (walking backwards).
+
+   Verify there is a single path between the blocks and none of the blocks
+   in the path is already in VISITED_BBS.  If so, then update VISISTED_BBS,
+   add the new blocks to PATH and return TRUE.  Otherwise return FALSE.
+
+   Store the length of the subpath in NEXT_PATH_LENGTH.  */
+
+static bool

Re: [Patch 5/17] Add -fpermitted-flt-eval-methods=[c11|ts-18661-3]

2016-11-14 Thread James Greenhalgh

On Fri, Nov 11, 2016 at 09:42:32PM -0700, Sandra Loosemore wrote:
> On 11/11/2016 08:37 AM, James Greenhalgh wrote:
>
> >diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> >index f133b3a..75ff8ec 100644
> >--- a/gcc/doc/invoke.texi
> >+++ b/gcc/doc/invoke.texi
> >@@ -378,7 +378,8 @@ Objective-C and Objective-C++ Dialects}.
> > -flto-partition=@var{alg} -fmerge-all-constants @gol
> > -fmerge-constants -fmodulo-sched -fmodulo-sched-allow-regmoves @gol
> > -fmove-loop-invariants -fno-branch-count-reg @gol
> >--fno-defer-pop -fno-fp-int-builtin-inexact -fno-function-cse @gol
> >+-fno-defer-pop -fno-fp-int-builtin-inexact @gol
> >+-fpermitted-flt-eval-methods=@var{standard} -fno-function-cse @gol
> > -fno-guess-branch-probability -fno-inline -fno-math-errno -fno-peephole @gol
> > -fno-peephole2 -fno-sched-interblock -fno-sched-spec -fno-signed-zeros @gol
> > -fno-toplevel-reorder -fno-trapping-math -fno-zero-initialized-in-bss @gol
>
> This seems like totally the wrong place to document this.  The new
> option seems more like a code generation option than an optimization
> option, which is what the context of the above patch hunk is.
> Moreover, the list above is alphabetized and you're sticking the new
> option in a totally random place within the list.

Oh! Definitely a case of not seeing the wood for the trees there. All
I knew was that the option was a little bit like -fexcess-precision, but
I completely failed to spot that this list was alpahbetised, or even to
see that the list related to optimisation options!

Sorry for that, I've moved it now to "C Language Options" - which as far as
I can tell is not in alphabetical order, I slotted it in after gnu89-inline
as that made sense to me, but I can move it again if you like. Is that
placement more acceptable?

Thanks,
James

---

gcc/c-family/

2016-11-09  James Greenhalgh  

* c-opts.c (c_common_post_options): Add logic to handle the default
case for -fpermitted-flt-eval-methods.

gcc/

2016-11-09  James Greenhalgh  

* common.opt (fpermitted-flt-eval-methods): New.
* doc/invoke.texi (-fpermitted-flt-eval-methods): Document it.
* flag_types.h (permitted_flt_eval_methods): New.

gcc/testsuite/

2016-11-09  James Greenhalgh  

* gcc.dg/fpermitted-flt-eval-methods_1.c: New.
* gcc.dg/fpermitted-flt-eval-methods_2.c: New.

diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index de260e7..57717ff 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -788,6 +788,18 @@ c_common_post_options (const char **pfilename)
   && flag_unsafe_math_optimizations == 0)
 flag_fp_contract_mode = FP_CONTRACT_OFF;
 
+  /* If we are compiling C, and we are outside of a standards mode,
+ we can permit the new values from ISO/IEC TS 18661-3 for
+ FLT_EVAL_METHOD.  Otherwise, we must restrict the possible values to
+ the set specified in ISO C99/C11.  */
+  if (!flag_iso
+  && !c_dialect_cxx ()
+  && (global_options_set.x_flag_permitted_flt_eval_methods
+	  == PERMITTED_FLT_EVAL_METHODS_DEFAULT))
+flag_permitted_flt_eval_methods = PERMITTED_FLT_EVAL_METHODS_TS_18661;
+  else
+flag_permitted_flt_eval_methods = PERMITTED_FLT_EVAL_METHODS_C11;
+
   /* By default we use C99 inline semantics in GNU99 or C99 mode.  C99
  inline semantics are not supported in GNU89 or C89 mode.  */
   if (flag_gnu89_inline == -1)
diff --git a/gcc/common.opt b/gcc/common.opt
index 314145a..915c406 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1326,6 +1326,21 @@ Enum(excess_precision) String(fast) Value(EXCESS_PRECISION_FAST)
 EnumValue
 Enum(excess_precision) String(standard) Value(EXCESS_PRECISION_STANDARD)
 
+; Whether we permit the extended set of values for FLT_EVAL_METHOD
+; introduced in ISO/IEC TS 18661-3, or limit ourselves to those in C99/C11.
+fpermitted-flt-eval-methods=
+Common Joined RejectNegative Enum(permitted_flt_eval_methods) Var(flag_permitted_flt_eval_methods) Init(PERMITTED_FLT_EVAL_METHODS_DEFAULT)
+-fpermitted-flt-eval-methods=[c11|ts-18661]	Specify which values of FLT_EVAL_METHOD are permitted.
+
+Enum
+Name(permitted_flt_eval_methods) Type(enum permitted_flt_eval_methods) UnknownError(unknown specification for the set of FLT_EVAL_METHOD values to permit %qs)
+
+EnumValue
+Enum(permitted_flt_eval_methods) String(c11) Value(PERMITTED_FLT_EVAL_METHODS_C11)
+
+EnumValue
+Enum(permitted_flt_eval_methods) String(ts-18661-3) Value(PERMITTED_FLT_EVAL_METHODS_TS_18661)
+
 ffast-math
 Common Optimization
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index eb89804..0ec10ba 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -178,6 +178,7 @@ in the following sections.
 @item C Language Options
 @xref{C Dialect Options,,Options Controlling C Dialect}.
 @gccoptlist{-ansi  -std=@var{standard}  -fgnu89-inline @gol
+-fpermitted-flt-eval-methods=@var{standard} @gol
 -aux-info @var{filename} -fallow-parameterless-variadic-functions @gol
 -fno-asm  -fno-bu

[RFC PATCH] Support -fsanitize=integer-arith-overflow even for vectors

2016-11-14 Thread Jakub Jelinek
Hi!

Working virtually out of Samoa.

The following patch is an attempt to handle -fsanitize=undefined
for vectors.  We already diagnose out of bounds accesses for vector
subscripts, this patch adds expansion for vector UBSAN_CHECK_* and generates
those in ubsan.  Haven't finished up the many vect elements handling (want
to emit a loop for code size).  Is this something we want for GCC 7?

--- gcc/ubsan.h.jj  2016-01-07 09:42:39.0 +0100
+++ gcc/ubsan.h 2016-11-14 10:47:19.887637713 +0100
@@ -52,7 +52,8 @@ extern tree ubsan_create_data (const cha
 extern tree ubsan_type_descriptor (tree, enum ubsan_print_style = 
UBSAN_PRINT_NORMAL);
 extern tree ubsan_encode_value (tree, bool = false);
 extern bool is_ubsan_builtin_p (tree);
-extern tree ubsan_build_overflow_builtin (tree_code, location_t, tree, tree, 
tree);
+extern tree ubsan_build_overflow_builtin (tree_code, location_t, tree, tree,
+ tree, tree *);
 extern tree ubsan_instrument_float_cast (location_t, tree, tree);
 extern tree ubsan_get_source_location_type (void);
 
--- gcc/internal-fn.c.jj2016-11-07 11:58:43.0 +0100
+++ gcc/internal-fn.c   2016-11-14 10:54:46.854918596 +0100
@@ -513,7 +513,7 @@ expand_ubsan_result_store (rtx target, r
 static void
 expand_addsub_overflow (location_t loc, tree_code code, tree lhs,
tree arg0, tree arg1, bool unsr_p, bool uns0_p,
-   bool uns1_p, bool is_ubsan)
+   bool uns1_p, bool is_ubsan, tree *datap)
 {
   rtx res, target = NULL_RTX;
   tree fn;
@@ -929,7 +929,7 @@ expand_addsub_overflow (location_t loc,
   /* Expand the ubsan builtin call.  */
   push_temp_slots ();
   fn = ubsan_build_overflow_builtin (code, loc, TREE_TYPE (arg0),
-arg0, arg1);
+arg0, arg1, datap);
   expand_normal (fn);
   pop_temp_slots ();
   do_pending_stack_adjust ();
@@ -958,7 +958,8 @@ expand_addsub_overflow (location_t loc,
 /* Add negate overflow checking to the statement STMT.  */
 
 static void
-expand_neg_overflow (location_t loc, tree lhs, tree arg1, bool is_ubsan)
+expand_neg_overflow (location_t loc, tree lhs, tree arg1, bool is_ubsan,
+tree *datap)
 {
   rtx res, op1;
   tree fn;
@@ -1024,7 +1025,7 @@ expand_neg_overflow (location_t loc, tre
   /* Expand the ubsan builtin call.  */
   push_temp_slots ();
   fn = ubsan_build_overflow_builtin (NEGATE_EXPR, loc, TREE_TYPE (arg1),
-arg1, NULL_TREE);
+arg1, NULL_TREE, datap);
   expand_normal (fn);
   pop_temp_slots ();
   do_pending_stack_adjust ();
@@ -1048,7 +1049,8 @@ expand_neg_overflow (location_t loc, tre
 
 static void
 expand_mul_overflow (location_t loc, tree lhs, tree arg0, tree arg1,
-bool unsr_p, bool uns0_p, bool uns1_p, bool is_ubsan)
+bool unsr_p, bool uns0_p, bool uns1_p, bool is_ubsan,
+tree *datap)
 {
   rtx res, op0, op1;
   tree fn, type;
@@ -1685,7 +1687,7 @@ expand_mul_overflow (location_t loc, tre
   /* Expand the ubsan builtin call.  */
   push_temp_slots ();
   fn = ubsan_build_overflow_builtin (MULT_EXPR, loc, TREE_TYPE (arg0),
-arg0, arg1);
+arg0, arg1, datap);
   expand_normal (fn);
   pop_temp_slots ();
   do_pending_stack_adjust ();
@@ -1734,6 +1736,81 @@ expand_mul_overflow (location_t loc, tre
 }
 }
 
+static void
+expand_vector_ubsan_overflow (location_t loc, enum tree_code code, tree lhs,
+ tree arg0, tree arg1)
+{
+  int cnt = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
+  rtx_code_label *done_label = NULL, *beg_label = NULL;
+  rtx cntvar = NULL_RTX;
+  tree eltype = TREE_TYPE (TREE_TYPE (arg0));
+  tree sz = TYPE_SIZE (eltype);
+  tree data = NULL_TREE;
+
+  if (cnt > 4)
+{
+  do_pending_stack_adjust ();
+  done_label = gen_label_rtx ();
+  beg_label = gen_label_rtx ();
+  cntvar = gen_reg_rtx (word_mode);
+  emit_move_insn (cntvar, const0_rtx);
+  emit_label (beg_label);
+}
+  for (int i = 0; i < (cnt > 4 ? 1 : cnt); i++)
+{
+  tree op0, op1;
+  if (cnt > 4)
+   {
+ gcc_unreachable ();
+   }
+  else
+   {
+ tree bitpos = bitsize_int (tree_to_uhwi (sz) * i);
+ op0 = fold_build3 (BIT_FIELD_REF, eltype, arg0, sz, bitpos);
+ op1 = fold_build3 (BIT_FIELD_REF, eltype, arg1, sz, bitpos);
+   }
+  switch (code)
+   {
+   case PLUS_EXPR:
+ expand_addsub_overflow (loc, PLUS_EXPR, NULL_TREE, op0, op1,
+ false, false, false, true, &data);
+ break;
+   case MINUS_EXPR:
+ if (integer_zerop (op0))
+   expand_neg_overflow (loc, NULL_TREE, op1, true

Re: [PATCH] loop distribution bug fix

2016-11-14 Thread Richard Biener
On Fri, Nov 11, 2016 at 7:55 PM, Jim Wilson  wrote:
> On Thu, Nov 10, 2016 at 2:53 AM, Richard Biener
>  wrote:
>> The biggest "lack" of loop distribution is the ability to undo CSE so for
>
> I hadn't noticed this problem yet.  I will have to take a look.
>
>> Then of course the cost model is purely modeled for STREAM (reduce the number
>> of memory streams).  So loop distribution is expected to pessimize code for
>> the CSE case in case you are not memory bound and improve things if you
>> are memory bound.
>
> I noticed this problem.  I think loop distribution should be callable
> from inside the vectorizer or vice versa.  if a loop can't be
> vectorized, but distributing the loop allows the sub loops to be
> vectorized, then we should go ahead and ditsribute, even if that
> increases the number of memory streams slightly, as the gain from
> vectorizing should be greater than the loss from the additional memory
> streams.  We could have a cost model that tries to compute the
> gain/loss here and make a better decision of when to distribute to
> increase vectorization at the expense of the number of memory streams.
> This looks like a major project though, and may be more work than I
> have time for.

Yes.  That's true for most enabling transforms (an easier one that comes to
my mind is final value replacement, which, when required from the vectorizer
could use a different cost model).

Richard.

> Jim


Re: [PATCH] df: Change defs in entry and uses in exit block during separate shrink-wrapping

2016-11-14 Thread Richard Biener
On Sat, Nov 12, 2016 at 9:31 AM, Segher Boessenkool
 wrote:
> So far all target implementations of the separate shrink-wrapping hooks
> use the DF LIVE info to figure out around which basic blocks the non-
> volatile registers need to be saved.  This is done by looking at the
> IN+GEN+KILL sets of the basic blocks.  However, that doesn't work for
> registers that DF says are defined in the entry block, or used in the
> exit block.
>
> This patch introduces a shrink_wrap_separate_in_progress variable, and
> makes df_get_entry_block_def_set and df_get_exit_block_use_set set the
> respective sets to empty if that variable is set to true.  It also
> changes the rs6000 port to use IN+GEN+KILL for the LR component.
>
> [  is an older
> version of this, using a different (much inferior) approach. ]
>
> Tested on powerpc64-linux {-m32,-m64}.  Is this okay for trunk?

Globals like this are somewhat gross.  There are df_changeable_flags
where we seem to have a "related" flag DF_RD_PRUNE_DEAD_DEFS
so you could add a flag for this.  There's also local_flags (only used
by df_chain_add_problem).

Richard.

>
> Segher
>
>
> 2016-11-12  Segher Boessenkool  
>
> * config/rs6000/rs6000.c (rs6000_components_for_bb): Mark the LR
> component as used also if LR_REGNO is a live input to the bb.
> * df-scan.c (df_get_entry_block_def_set): Return immediately after
> clearing the set if shrink_wrap_separate_in_progress.
> (df_get_exit_block_use_set): Ditto.
> * rtl.h (shrink_wrap_separate_in_progress): Declare new variable.
> * shrink-wrap.c (shrink_wrap_separate_in_progress): New variable.
> (try_shrink_wrapping_separate): Set shrink_wrap_separate_in_progress
> and call df_update_entry_block_defs and df_update_exit_block_uses
> at the start; clear that variable and call those functions at the end.
>
> ---
>  gcc/config/rs6000/rs6000.c |  3 ++-
>  gcc/df-scan.c  | 16 
>  gcc/rtl.h  |  3 +++
>  gcc/shrink-wrap.c  | 12 
>  4 files changed, 33 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 49985f1..8d6b2d5 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -27714,7 +27714,8 @@ rs6000_components_for_bb (basic_block bb)
>bitmap_set_bit (components, regno);
>
>/* LR needs to be saved around a bb if it is killed in that bb.  */
> -  if (bitmap_bit_p (gen, LR_REGNO)
> +  if (bitmap_bit_p (in, LR_REGNO)
> +  || bitmap_bit_p (gen, LR_REGNO)
>|| bitmap_bit_p (kill, LR_REGNO))
>  bitmap_set_bit (components, 0);
>
> diff --git a/gcc/df-scan.c b/gcc/df-scan.c
> index 7cfd34b..398842b 100644
> --- a/gcc/df-scan.c
> +++ b/gcc/df-scan.c
> @@ -3506,6 +3506,14 @@ df_get_entry_block_def_set (bitmap entry_block_defs)
>
>bitmap_clear (entry_block_defs);
>
> +  /* For separate shrink-wrapping we use LIVE to analyze which basic blocks
> + need a prologue for some component to be executed before that block,
> + and we do not care about any other registers.  Hence, we do not want
> + any register for any component defined in the entry block, and we can
> + just leave all registers undefined.  */
> +  if (shrink_wrap_separate_in_progress)
> +return;
> +
>for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
>  {
>if (global_regs[i])
> @@ -3665,6 +3673,14 @@ df_get_exit_block_use_set (bitmap exit_block_uses)
>
>bitmap_clear (exit_block_uses);
>
> +  /* For separate shrink-wrapping we use LIVE to analyze which basic blocks
> + need an epilogue for some component to be executed after that block,
> + and we do not care about any other registers.  Hence, we do not want
> + any register for any component seen as used in the exit block, and we
> + can just say no registers at all are used.  */
> +  if (shrink_wrap_separate_in_progress)
> +return;
> +
>/* Stack pointer is always live at the exit.  */
>bitmap_set_bit (exit_block_uses, STACK_POINTER_REGNUM);
>
> diff --git a/gcc/rtl.h b/gcc/rtl.h
> index 3bb6a22..d1409bc 100644
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -3468,6 +3468,9 @@ extern int lra_in_progress;
>
>  #define can_create_pseudo_p() (!reload_in_progress && !reload_completed)
>
> +/* Set to true during separate shrink-wrapping.  */
> +extern bool shrink_wrap_separate_in_progress;
> +
>  #ifdef STACK_REGS
>  /* Nonzero after end of regstack pass.
> Set to 1 or 0 by reg-stack.c.  */
> diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c
> index e480d4d..dd0cae1 100644
> --- a/gcc/shrink-wrap.c
> +++ b/gcc/shrink-wrap.c
> @@ -1764,6 +1764,10 @@ insert_prologue_epilogue_for_components (sbitmap 
> components)
>commit_edge_insertions ();
>  }
>
> +/* Used by DF: if true, the entry block defines no registers, and the exit
> +   block uses none.  */
> +bool shrink_wrap_separat

Re: [patch] Disable LTO note about strict aliasing

2016-11-14 Thread Richard Biener
On Sun, Nov 13, 2016 at 11:31 PM, Eric Botcazou  wrote:
> It's the note issued by the -Wlto-type-mismatch warning:
>
> q.ads:7:13: warning: type of 'q__proc' does not match original declaration [-
> Wlto-type-mismatch]
>procedure Proc (A : Arr);
>  ^
> q.adb:7:3: note: 'q__proc' was previously declared here
>procedure Proc (A : Arr) is begin null; end;
>^
> q.adb:7:3: note: code may be misoptimized unless -fno-strict-aliasing is used
>
> and it's a bit surprising that -fno-strict-aliasing cannot silence it.

The issue is that we can have different -fstrict-aliasing status on
different functions
(from individual TUs).  I'm not sure if a -fno-strict-aliasing at
link/WPA time will
put them to that state.

> Tested on x86_64-suse-linux, OK for the mainline and 6 branch?

Can you verify that a TU compiled with -fstrict-aliasing will link as
if -fno-strict-aliasing
if -fno-strict-aliasing is specified at link time?  We're also still
in a very weird state
of fixing TU compile flags via the optimize/target attribute to link
time while at the
same time still going through lto-opts.c and lto-wrapper "merging" of
those options.

That said, -Wno-lto-type-mismatch can be used to disable the warning as well.

Thanks,
Richard.

>
> 2016-11-13  Eric Botcazou  
>
> lto/
> * lto-symtab.c (lto_symtab_merge_decls_2): Only issue note on strict
> aliasing if -fstrict-aliasing is enabled.
>
> --
> Eric Botcazou


RE: [MIPS] Enable descriptors for nested functions in Ada

2016-11-14 Thread Matthew Fortune
Eric Botcazou  writes:
> Similarly to x86, PowerPC and SPARC, this enables the use of custom run-
> time descriptors in Ada, thus eliminating the need for trampolines and
> executable stack in presence of pointers to nested functions.
> 
> Unfortunately I don't have access to MIPS hardware any more, but the
> scheme was tested on the architecture at some point.  OK for the
> mainline?
> 
> 
> 2016-11-13  Eric Botcazou  
> 
> PR ada/67205
>   * config/mips/mips.c (TARGET_CUSTOM_FUNCTION_DESCRIPTORS): Define.

Hi Eric,

Thanks for the patch. I'm a bit concerned about the interaction this
will have with microMIPS which can (albeit not implemented today) use
2-byte alignment on function entry points.

Is the solution for other targets to mandate 4-byte alignment when
using function descriptors?

If so then I don't see a problem with this. We will have to account for
that when GCC allows 2-byte aligned microMIPS functions.

Thanks,
Matthew




[PATCH] Fix PR78312

2016-11-14 Thread Richard Biener

backprop changes values of SSA names - while it properly handles
debug info it fails to reset range/align info.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2016-11-14  Richard Biener  

PR tree-optimization/78312
* gimple-ssa-backprop.c (backprop::prepare_change): Reset
flow-sensitive info.

* gcc.dg/torture/pr78312.c: New testcase.

Index: gcc/gimple-ssa-backprop.c
===
--- gcc/gimple-ssa-backprop.c   (revision 242066)
+++ gcc/gimple-ssa-backprop.c   (working copy)
@@ -728,6 +728,7 @@ backprop::prepare_change (tree var)
 {
   if (MAY_HAVE_DEBUG_STMTS)
 insert_debug_temp_for_var_def (NULL, var);
+  reset_flow_sensitive_info (var);
 }
 
 /* STMT has been changed.  Give the fold machinery a chance to simplify
Index: gcc/testsuite/gcc.dg/torture/pr78312.c
===
--- gcc/testsuite/gcc.dg/torture/pr78312.c  (revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr78312.c  (working copy)
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+
+typedef unsigned short u16;
+
+static u16 a;
+
+u16 __attribute__ ((noinline, noclone))
+foo (int p1)
+{
+  a = -(p1 > 0);
+  a *= 0 != a;
+  a *= (unsigned)a;
+  return a;
+}
+
+int
+main ()
+{
+  u16 x = foo (1);
+  if (x != 1)
+__builtin_abort();
+  return 0;
+}


Re: [PATCH] [ARC] New option handling, refurbish multilib support.

2016-11-14 Thread Andrew Burgess
* Claudiu Zissulescu  [2016-11-10 12:02:34 
+0100]:

> Hi,
> 
> Please find the revised patch which includes the refurbishing of
> mmpy-option option, and a new comment on DEFAULT_arc_fpu_build
> define. As for the last suggestion, my proposal is to have a latter
> patch on the topic of .cpu, synced with a related binutils patch.

I don't understand this last point.

HEAD binutils supports '.cpu NPS400' and HEAD GCC also correctly emits
'.cpu NPS400' (when -mcpu=nps400 is passed).  After your change we no
longer correctly emit '.cpu NPS400'.

I don't understand what syncing is required with binutils?

Thanks,
Andrew




> 
> OK to apply?
> Claudiu
> 
> gcc/
> 2016-05-09  Claudiu Zissulescu  
> 
>   * config/arc/arc-arch.h: New file.
>   * config/arc/arc-arches.def: Likewise.
>   * config/arc/arc-cpus.def: Likewise.
>   * config/arc/arc-options.def: Likewise.
>   * config/arc/t-multilib: Likewise.
>   * config/arc/genmultilib.awk: Likewise.
>   * config/arc/genoptions.awk: Likewise.
>   * config/arc/arc-tables.opt: Likewise.
>   * config/arc/driver-arc.c: Likewise.
>   * common/config/arc/arc-common.c (arc_handle_option): Trace
>   toggled options.
>   * config.gcc (arc*-*-*): Add arc-tables.opt to arc's extra
>   options; check for supported cpu against arc-cpus.def file.
>   (arc*-*-elf*, arc*-*-linux-uclibc*): Use new make fragment; define
>   TARGET_CPU_BUILD macro; add driver-arc.o as an extra object.
>   * config/arc/arc-c.def: Add emacs local variables.
>   * config/arc/arc-opts.h (processor_type): Use arc-cpus.def file.
>   (FPU_FPUS, FPU_FPUD, FPU_FPUDA, FPU_FPUDA_DIV, FPU_FPUDA_FMA)
>   (FPU_FPUDA_ALL, FPU_FPUS_DIV, FPU_FPUS_FMA, FPU_FPUS_ALL)
>   (FPU_FPUD_DIV, FPU_FPUD_FMA, FPU_FPUD_ALL): New defines.
>   (DEFAULT_arc_fpu_build): Define.
>   (DEFAULT_arc_mpy_option): Define.
>   * config/arc/arc-protos.h (arc_init): Delete.
>   * config/arc/arc.c (arc_cpu_name): New variable.
>   (arc_selected_cpu, arc_selected_arch, arc_arcem, arc_archs)
>   (arc_arc700, arc_arc600, arc_arc601): New variable.
>   (arc_init): Add static; remove selection of default tune value,
>   cleanup obsolete error messages.
>   (arc_override_options): Make use of .def files for selecting the
>   right cpu and option configurations.
>   * config/arc/arc.h (stdbool.h): Include.
>   (TARGET_CPU_DEFAULT): Define.
>   (CPP_SPEC): Remove mcpu=NPS400 handling.
>   (arc_cpu_to_as): Declare.
>   (EXTRA_SPEC_FUNCTIONS): Define.
>   (OPTION_DEFAULT_SPECS): Likewise.
>   (ASM_DEFAULT): Remove.
>   (ASM_SPEC): Use arc_cpu_to_as.
>   (DRIVER_SELF_SPECS): Remove deprecated options.
>   (arc_base_cpu): Declare.
>   (TARGET_ARC600, TARGET_ARC601, TARGET_ARC700, TARGET_EM)
>   (TARGET_HS, TARGET_V2, TARGET_ARC600): Make them use arc_base_cpu
>   variable.
>   (MULTILIB_DEFAULTS): Use ARC_MULTILIB_CPU_DEFAULT.
>   * config/arc/arc.md (attr_cpu): Remove.
>   * config/arc/arc.opt (mno-mpy): Deprecate.
>   (mcpu=ARC600, mcpu=ARC601, mcpu=ARC700, mcpu=NPS400, mcpu=ARCEM)
>   (mcpu=ARCHS): Remove.
>   (mcrc, mdsp-packa, mdvbf, mmac-d16, mmac-24, mtelephony, mrtsc):
>   Deprecate.
>   (mbarrel_shifte, mspfp_, mdpfp_, mdsp_pack, mmac_): Remove.
>   (arc_fpu): Use new defines.
>   (mpy-option): Change to use numeric or string like inputs.
>   * config/arc/t-arc (driver-arc.o): New target.
>   (arc-cpus, t-multilib, arc-tables.opt): Likewise.
>   * config/arc/t-arc-newlib: Delete.
>   * config/arc/t-arc-uClibc: Renamed to t-uClibc.
>   * doc/invoke.texi (ARC): Update arc options.
> ---
>  gcc/common/config/arc/arc-common.c |  69 -
>  gcc/config.gcc |  47 +
>  gcc/config/arc/arc-arch.h  | 120 ++
>  gcc/config/arc/arc-arches.def  |  56 ++
>  gcc/config/arc/arc-c.def   |   4 +
>  gcc/config/arc/arc-cpus.def|  75 ++
>  gcc/config/arc/arc-options.def | 109 
>  gcc/config/arc/arc-opts.h  |  49 +++--
>  gcc/config/arc/arc-protos.h|   1 -
>  gcc/config/arc/arc-tables.opt  |  90 
>  gcc/config/arc/arc.c   | 176 +---
>  gcc/config/arc/arc.h   |  89 
>  gcc/config/arc/arc.md  |   5 -
>  gcc/config/arc/arc.opt | 169 +++---
>  gcc/config/arc/driver-arc.c|  78 ++
>  gcc/config/arc/genmultilib.awk | 203 
> +
>  gcc/config/arc/genoptions.awk  |  86 
>  gcc/config/arc/t-arc   |  19 
>  gcc/config/arc/t-arc-newlib|  46 -
>  gcc/config/arc/t-arc-uClibc|  20 
>  gcc/config/arc/t-multilib  |  34 +++
>  gcc/config/arc/t-uClibc  

RE: [PATCH 0/3] MIPS/GCC: Changes for `.insn' assembly annotation

2016-11-14 Thread Matthew Fortune
Maciej Rozycki  writes:
>  This small patch series addresses an issue uncovered by a recent
> binutils master branch update, scheduled for the upcoming 2.28 release,
> where we fail to annotate stray code labels -- generally produced for
> code marked as unreachable -- that have no instruction following, with
> the `.insn'
> pseudo-op, as explicitly required with the microMIPS ISA and also needed
> with MIPS16 code.  The missing annotation causes assembly failures if
> such a label is a target of a branch.

Thanks for the patches.

Obviously this is going to introduce another dependency between GCC and
binutils releases where new binutils will cause older GCC to stop working.
While there is some justification to this, given the new binutils
behaviour is to add safety, I expect we will see users hit this issue
and want newer binutils with older GCC.

At some point in the future we may therefore have to consider backports
of this work to pre-existing GCC releases or a configure time check in
binutils to relax the new checks if GCC does not have the .insn fix. I
don't know how reliable the latter will but it may be possible.

Thanks,
Matthew


RE: [PATCH] [ARC] New option handling, refurbish multilib support.

2016-11-14 Thread Claudiu Zissulescu
> HEAD binutils supports '.cpu NPS400' and HEAD GCC also correctly emits
> '.cpu NPS400' (when -mcpu=nps400 is passed).  After your change we no
> longer correctly emit '.cpu NPS400'.
> 

Sorry for this miss-understanding. Updating the patch to emit .cpu NSPS400.

Thanks,
Claudiu


RE: [PATCH 1/3] MIPS/GCC/test: Implement `-mmicromips' option test

2016-11-14 Thread Matthew Fortune
Maciej Rozycki  writes:
>   gcc/testsuite/
>   * gcc.target/mips/mips.exp (mips_option_tests): Add
>   `-mmicromips' array element.

OK, thanks.

Matthew


RE: [PATCH 2/3] MIPS/GCC/test: Implement `-mcode-readable=yes' option test

2016-11-14 Thread Matthew Fortune
Maciej Rozycki  writes:
>   gcc/testsuite/
>   * gcc.target/mips/mips.exp (mips_option_tests): Add
>   `-mcode-readable=yes' array element.
> ---
>  OK to apply?

OK, thanks.

Matthew


Re: [PATCH] DWARF: make signedness explicit for enumerator const values

2016-11-14 Thread Pierre-Marie de Rodat

Mark,

Thank you for your answer!

On 11/10/2016 01:38 PM, Mark Wielaard wrote:

IMHO having an explicit DW_AT_type pointing at the base type with size
and encoding for the DW_TAG_enumerator_type is better for consumers than
having to try and interpret the DW_FORM used to encode the values.


I’m curious about why you think this alternative is better for 
consumers: after all, they always have to interpret the DW_FORM anyway 
in order to decode the DIE stream. At least this goes against the DWARF 
standard’s “strong” recommendation: section 7.5.4 Attribute Encodings says:


If one of the DW_FORM_data forms is used to represent a signed
or unsigned integer, it can be hard for a consumer to discover the
context necessary to determine which interpretation is intended.
Producers are therefore strongly encouraged to use DW_FORM_sdata or
DW_FORM_udata for signed and unsigned integers respectively, rather
than DW_FORM_data.


Alternatively could we just attach a DW_AT_encoding to the
DW_TAG_enumeration_type? The spec doesn't list it as one of the
attributes for an enumeration_type, but it makes sense given it already
carries bit/byte size attributes.


I agree it would make sense, but would it be acceptable to enable this 
even in strict mode? If not, I’d prefer to stick to a solution that 
would apply everywhere. :-)


--
Pierre-Marie de Rodat


[PATCH] vimrc: fix TAB settings

2016-11-14 Thread Martin Liška
Hello.

Following patch adds TAB settings to contrib/vimrc file.
Hope it looks reasonable?

Thanks,
Martin
>From 84eb32c4e84b87d690033f7505b8570427ab8468 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Sun, 13 Nov 2016 12:05:48 +0100
Subject: [PATCH] vimrc: fix TAB settings

---
 contrib/vimrc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/contrib/vimrc b/contrib/vimrc
index 34e8f35..7c0c587 100644
--- a/contrib/vimrc
+++ b/contrib/vimrc
@@ -35,7 +35,10 @@ function! SetStyle()
   let l:c_exts = ['c', 'h', 'cpp', 'cc', 'C', 'H', 'def', 'java']
   if index(l:c_exts, l:ext) != -1
 setlocal cindent
+setlocal tabstop=8
 setlocal softtabstop=2
+setlocal shiftwidth=2
+setlocal noexpandtab
 setlocal cinoptions=>4,n-2,{2,^-2,:2,=2,g0,f0,h2,p4,t0,+2,(0,u0,w1,m0
 setlocal textwidth=80
 setlocal formatoptions-=ro formatoptions+=cqlt
-- 
2.10.1



Re: [PATCH] PR fortran/78300 -- class procedure as actual arg

2016-11-14 Thread Janus Weil
2016-11-14 9:56 GMT+01:00 Janus Weil :
> Hi Steve,
>
>> The attach patch allows a procedure with a class result to
>> be an actual argument to subprogram where the dummy argument
>> expected to be a class.  OK to commit?
>
> that patch actually does not look quite right to me. Does it survive a 
> regtest?
>
> I think one should rather check why the class_ok attribute is not set
> in the first place, any maybe apply a fix in gfc_build_class_symbol.

After looking into this a little bit more, I found that the culprit
seems to be 'resolve_procedure_interface', which does not properly
copy the 'class_ok' attribute. I propose the attached patch to fix
this (regtesting right now) ...

Cheers,
Janus
Index: gcc/fortran/resolve.c
===
--- gcc/fortran/resolve.c   (Revision 242380)
+++ gcc/fortran/resolve.c   (Arbeitskopie)
@@ -214,27 +214,33 @@ resolve_procedure_interface (gfc_symbol *sym)
   if (ifc->result)
{
  sym->ts = ifc->result->ts;
+ sym->attr.allocatable = ifc->result->attr.allocatable;
+ sym->attr.pointer = ifc->result->attr.pointer;
+ sym->attr.dimension = ifc->result->attr.dimension;
+  sym->attr.class_ok = ifc->result->attr.class_ok;
+ sym->as = gfc_copy_array_spec (ifc->result->as);
  sym->result = sym;
}
   else
-   sym->ts = ifc->ts;
+   {
+ sym->ts = ifc->ts;
+ sym->attr.allocatable = ifc->attr.allocatable;
+ sym->attr.pointer = ifc->attr.pointer;
+ sym->attr.dimension = ifc->attr.dimension;
+ sym->attr.class_ok = ifc->attr.class_ok;
+ sym->as = gfc_copy_array_spec (ifc->as);
+   }
   sym->ts.interface = ifc;
   sym->attr.function = ifc->attr.function;
   sym->attr.subroutine = ifc->attr.subroutine;
 
-  sym->attr.allocatable = ifc->attr.allocatable;
-  sym->attr.pointer = ifc->attr.pointer;
   sym->attr.pure = ifc->attr.pure;
   sym->attr.elemental = ifc->attr.elemental;
-  sym->attr.dimension = ifc->attr.dimension;
   sym->attr.contiguous = ifc->attr.contiguous;
   sym->attr.recursive = ifc->attr.recursive;
   sym->attr.always_explicit = ifc->attr.always_explicit;
   sym->attr.ext_attr |= ifc->attr.ext_attr;
   sym->attr.is_bind_c = ifc->attr.is_bind_c;
-  sym->attr.class_ok = ifc->attr.class_ok;
-  /* Copy array spec.  */
-  sym->as = gfc_copy_array_spec (ifc->as);
   /* Copy char length.  */
   if (ifc->ts.type == BT_CHARACTER && ifc->ts.u.cl)
{


RE: [PATCH 3/3] MIPS/GCC: Mark trailing labels with `.insn'

2016-11-14 Thread Matthew Fortune
Maciej Rozycki  writes:
> This however requires the correct annotation of branch targets as code,
> because the ISA mode is not relevant for data symbols and is therefore
> not recorded for them. 

I wonder if it would have been possible to add the ISA mode to data
symbols and hide it in readelf/objdump? I don't know what older binutils
would have done with such labels but it would have made the new checks
compatible with pre-existing GCC code generation. Regardless the changes
in this patch would still be important to correctly identify labels as
text.

> Let it be produced then, making it appear in output generated right
> after `$L2' definitions above and thus fixing the assembly.  Use the
> `mach2' pass, after all the MIPS16 constant pools have been fixed, to
> scan the insn stream backwards, identifying any labels still present at
> the end of a function or immediately preceding a MIPS16 constant pool,
> using dummy `consttable' insns previously inserted to identify the
> beginning of each such constant pool.  Insert the `insn_pseudo' insn
> immediately after these labels, which emits the `.insn' pseudo-op.
> 
> References:
> 
> [1] "MIPS Architecture for Programmers, Volume II-B: The microMIPS32
> Instruction Set", MIPS Technologies, Inc., Document Number: MD00582,
> Revision 5.04, January 15, 2014, Section 7.1 "Assembly-Level
> Compatibility", p. 533
> 
> [2] "MIPS Architecture for Programmers, Volume II-B: The microMIPS64
> Instruction Set", MIPS Technologies, Inc., Document Number: MD00594,
> Revision 5.04, January 15, 2014, Section 8.1 "Assembly-Level
> Compatibility", p. 623
> 
>   gcc/
>   * config/mips/mips.c (mips16_emit_constants): Emit `consttable'
>   insn at the beginning of the constant pool.
>   (mips_insert_insn_pseudos): New function.
>   (mips_machine_reorg2): Call it.
>   * config/mips/mips.md (unspec): Add UNSPEC_CONSTTABLE and
>   UNSPEC_INSN_PSEUDO enum values.
>   (insn_pseudo, consttable): New insns.
> 
>   gcc/testsuite/
>   * gcc.target/mips/insn-casesi.c: New test case.
>   * gcc.target/mips/insn-pseudo-1.c: New test case.
>   * gcc.target/mips/insn-pseudo-2.c: New test case.
>   * gcc.target/mips/insn-pseudo-3.c: New test case.
>   * gcc.target/mips/insn-pseudo-4.c: New test case.
>   * gcc.target/mips/insn-tablejump.c: New test case.
> ---
>  I have successfully regression-tested it with the `mips-mti-linux-gnu'
> target, with a big-endian o32 regular MIPS multilib and a little-endian
> o32 MIPS16 multilib, with no regressions, except as noted below.  I did
> some big-endian n64 regular MIPS and little-endian o32 microMIPS
> testing, including with the new cases, and things looked good, except as
> noted below.  I also generated assembly manually (for the assembly-match
> cases) and examined output visually, including all the four above
> multilibs, and also -fPIC and -mno-abicalls variants, which I have no
> immediate way of testing automatically.

As noted below and my opinion in general... Dealing with the intricacies
of getting the MIPS part of the GCC testsuite running cleanly for all
variations of the architecture is a prohibitively expensive process to
apply to each patch. Now that we are in stage 3 then various testsuite
issues will get dealt with.

>  With n64 (`-mabi=n64') testing none of the test cases under
> gcc.target/mips/ were run and the test harness broke as follows:
> 
> ERROR: (DejaGnu) proc "cc1: error: '-mfpxx' can only be used with the
> o32 ABI" does not exist.
> The error code is NONE
> The info on the error is:
> invalid command name "cc1:"
> while executing
> "::tcl_unknown cc1: error: '-mfpxx' can only be used with the o32 ABI"
> ("uplevel" body line 1)
> invoked from within
> "uplevel 1 ::tcl_unknown $args"
> 
> I take it as a bug in the harness, which ought to be looked into
> separately, and not a problem with this change.

I haven't seen this failure before which may be down to a different way
of invoking the testsuite I guess (I have run n64 testing fairly recently).

>  With MIPS16 (`-mips16') and microMIPS (`-mmicromips') testing the test
> cases failed to compile as follows, respectively:
> 
> spawn -ignore SIGHUP .../gcc/gcc/xgcc -B.../gcc/gcc/
> .../gcc/testsuite/gcc.target/mips/insn-tablejump.c -fno-diagnostics-
> show-caret -fdiagnostics-color=never --sysroot=.../sysroot -O0 -
> DNOMIPS16=__attribute__((nomips16)) -
> DNOMICROMIPS=__attribute__((nomicromips)) -
> DNOCOMPRESSION=__attribute__((nocompression)) -mmicromips -mno-mips16 -
> DMICROMIPS=__attribute__((micromips)) -EL -mips16 -Wl,-dynamic-
> linker,.../sysroot/mipsel-r2-mips16-hard/lib/ld.so.1 -Wl,-
> rpath,.../sysroot/mipsel-r2-mips16-hard/lib -Wl,-
> rpath,.../sysroot/mipsel-r2-mips16-hard/usr/lib -lm -o insn-
> tablejump.exe
> cc1: error: unsupported combination: -mips16 -mmicromips compiler exited
> with status 1 output is:
> cc1: error: unsupported combination: -mips16 -mmicr

Re: [PATCH] PR fortran/78300 -- class procedure as actual arg

2016-11-14 Thread Janus Weil
> After looking into this a little bit more, I found that the culprit
> seems to be 'resolve_procedure_interface', which does not properly
> copy the 'class_ok' attribute. I propose the attached patch to fix
> this (regtesting right now) ...

The regtest finished successfully. Is that patch ok for trunk?

Cheers,
Janus


[PATCH] remove conditional compilation of HAVE_AS_LEB128 code

2016-11-14 Thread tbsaunde+gcc
From: tbsaunde 

Last patch I'm squeezing in for stage 1.  Jeff approved this back in september,
but I never committed it for some reason.  So I updated it to trunk,
rebootstrapped and regtested and committed it.

Thanks!

Trev

gcc/ChangeLog:

2016-08-20  Trevor Saunders  

* acinclude.m4 (gcc_GAS_CHECK_FEATURE): Support doing an action
if the feature isn't available.
* configure: Regenerate.
* configure.ac: define HAVE_AS_LEB128 to 0 when not available.
* dwarf2asm.c (dw2_asm_output_data_uleb128): Always compile code
for HAVE_AS_LEB128.
(dw2_asm_output_data_sleb128): Likewise.
(dw2_asm_output_delta_uleb128): Likewise.
(dw2_asm_output_delta_sleb128): Likewise.
* except.c (output_one_function_exception_table): Likewise.
(dw2_size_of_call_site_table): Likewise.
(sjlj_size_of_call_site_table): Likewise.
* dwarf2out.c (output_loc_list): Likewise.
(output_rnglists): Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@242381 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/acinclude.m4 |   4 ++
 gcc/configure| 106 +++-
 gcc/configure.ac |   2 +
 gcc/dwarf2asm.c  | 184 +++
 gcc/dwarf2out.c  | 153 ++---
 gcc/except.c | 162 
 6 files changed, 357 insertions(+), 254 deletions(-)

diff --git a/gcc/acinclude.m4 b/gcc/acinclude.m4
index 38dd899..791f2a7 100644
--- a/gcc/acinclude.m4
+++ b/gcc/acinclude.m4
@@ -550,6 +550,10 @@ AC_CACHE_CHECK([assembler for $1], [$2],
 ifelse([$7],,,[dnl
 if test $[$2] = yes; then
   $7
+fi])
+ifelse([$8],,,[dnl
+if test $[$2] != yes; then
+  $8
 fi])])
 
 dnl gcc_SUN_LD_VERSION
diff --git a/gcc/configure b/gcc/configure
index 0f04033..197a152 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -22432,6 +22432,7 @@ $as_echo "#define HAVE_GAS_BALIGN_AND_P2ALIGN 1" 
>>confdefs.h
 
 fi
 
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .p2align with 
maximum skip" >&5
 $as_echo_n "checking assembler for .p2align with maximum skip... " >&6; }
 if test "${gcc_cv_as_max_skip_p2align+set}" = set; then :
@@ -22467,6 +22468,7 @@ $as_echo "#define HAVE_GAS_MAX_SKIP_P2ALIGN 1" 
>>confdefs.h
 
 fi
 
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .literal16" >&5
 $as_echo_n "checking assembler for .literal16... " >&6; }
 if test "${gcc_cv_as_literal16+set}" = set; then :
@@ -22502,6 +22504,7 @@ $as_echo "#define HAVE_GAS_LITERAL16 1" >>confdefs.h
 
 fi
 
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for working 
.subsection -1" >&5
 $as_echo_n "checking assembler for working .subsection -1... " >&6; }
 if test "${gcc_cv_as_subsection_m1+set}" = set; then :
@@ -22549,6 +22552,7 @@ $as_echo "#define HAVE_GAS_SUBSECTION_ORDERING 1" 
>>confdefs.h
 
 fi
 
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .weak" >&5
 $as_echo_n "checking assembler for .weak... " >&6; }
 if test "${gcc_cv_as_weak+set}" = set; then :
@@ -22584,6 +22588,7 @@ $as_echo "#define HAVE_GAS_WEAK 1" >>confdefs.h
 
 fi
 
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .weakref" >&5
 $as_echo_n "checking assembler for .weakref... " >&6; }
 if test "${gcc_cv_as_weakref+set}" = set; then :
@@ -22619,6 +22624,7 @@ $as_echo "#define HAVE_GAS_WEAKREF 1" >>confdefs.h
 
 fi
 
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for .nsubspa 
comdat" >&5
 $as_echo_n "checking assembler for .nsubspa comdat... " >&6; }
 if test "${gcc_cv_as_nsubspa_comdat+set}" = set; then :
@@ -22655,6 +22661,7 @@ $as_echo "#define HAVE_GAS_NSUBSPA_COMDAT 1" 
>>confdefs.h
 
 fi
 
+
 # .hidden needs to be supported in both the assembler and the linker,
 # because GNU LD versions before 2.12.1 have buggy support for STV_HIDDEN.
 # This is irritatingly difficult to feature test for; we have to check the
@@ -22702,6 +22709,7 @@ fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_hidden" >&5
 $as_echo "$gcc_cv_as_hidden" >&6; }
 
+
 case "${target}" in
   *-*-darwin*)
 # Darwin as has some visibility support, though with a different syntax.
@@ -23157,6 +23165,11 @@ if test $gcc_cv_as_leb128 = yes; then
 $as_echo "#define HAVE_AS_LEB128 1" >>confdefs.h
 
 fi
+if test $gcc_cv_as_leb128 != yes; then
+
+$as_echo "#define HAVE_AS_LEB128 0" >>confdefs.h
+
+fi
 
 # Check if we have assembler support for unwind directives.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for cfi 
directives" >&5
@@ -23236,6 +23249,7 @@ fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_cfi_directive" >&5
 $as_echo "$gcc_cv_as_cfi_directive" >&6; }
 
+
 if test $gcc_cv_as_cfi_directive = yes && test x$gcc_cv_objdump != x; then
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for working cfi 
advance" >&5
 $as_echo_n "checking assembler for working cfi a

RE: [PATCH 0/3] MIPS/GCC: Changes for `.insn' assembly annotation

2016-11-14 Thread Maciej W. Rozycki
On Mon, 14 Nov 2016, Matthew Fortune wrote:

> At some point in the future we may therefore have to consider backports
> of this work to pre-existing GCC releases or a configure time check in
> binutils to relax the new checks if GCC does not have the .insn fix. I
> don't know how reliable the latter will but it may be possible.

 I'm leaning towards adding `-mrelax-branch-checks' or similar GAS and LD 
options for upcoming binutils 2.28 so that people who need to run a mixed 
combination of tools or have legacy handcoded assembly code they fear to 
touch have a way to get away.  That will be some burden for some people 
I'm afraid, but the benefit is potentially problematic code will be 
identified in the default toolchain configuration right away.

 Ideally we'd optimise away branches which lead to orphan labels, as 
obviously there's as little point in executing them as there was in 
retaining the code path which has been removed from the label on.  There 
is still at least one case though where we cannot just remove the branch 
by itself as it stands as it's a part of a larger monolithic assembly 
block, i.e. the MIPS16 `casesi_internal_mips16_' insn which as I 
noted in the other submission cannot be split into smaller pieces without 
a further investigation as to why it causes too much code to be removed.

 And then there are examples like the assembly gotos I included as test 
cases where the label has to be retained by definition.  I'd expect such 
cases to be extremely rare in real code though.

  Maciej


Re: [PATCH] DWARF: make signedness explicit for enumerator const values

2016-11-14 Thread Mark Wielaard
On Mon, 2016-11-14 at 12:08 +0100, Pierre-Marie de Rodat wrote:
> Thank you for your answer!
> 
> On 11/10/2016 01:38 PM, Mark Wielaard wrote:
> > IMHO having an explicit DW_AT_type pointing at the base type with size
> > and encoding for the DW_TAG_enumerator_type is better for consumers than
> > having to try and interpret the DW_FORM used to encode the values.
> 
> I’m curious about why you think this alternative is better for 
> consumers: after all, they always have to interpret the DW_FORM anyway 
> in order to decode the DIE stream.

Right, this comes from having these untyped (and unsized) forms in the
constant class. And the constant class being used with and without
context about how to interpret the constant (is it a bit pattern or a
value). In this particular case the value is represented through a
DW_AT_const_value attribute which can also be represented in block form.
Without (type) context you also don't know anything about the size. You
can either choose a signed/unsigned form not giving the consumer a hint
about the size of the underlying constant value or one of the sized data
forms that don't give a hint about the value representation/signedness.
So in both cases the consumer looses without an actual (base) type
telling them how to interpret the constant.

If the type/context is known then for a consumer it is easy to just have
a read signed/unsigned value method for attributes that provide a
constant/value which doesn't care about the underlying form. That also
means the producer can choose the smallest representation of the data
without worrying that the consumer might misinterpret the value
representation by the specific form chosen.

Cheers,

Mark


Re: [PATCH] Don't use priority {cd}tors if not supported by a target (PR, gcov-profile/78086)

2016-11-14 Thread Martin Liška
PING^1

On 10/31/2016 01:13 PM, Martin Liška wrote:
> On 10/31/2016 11:07 AM, Rainer Orth wrote:
>> Hi Martin,
>>
>>> Using priority {cd}tors on a target that does not support that can cause
>>> failures (see the PR).
>>> Apart from that, I decided to use priority 100 for both gcov_init and
>>> gcov_exit functions as
>>> the reserved range includes priority 100. Moreover, I enhanced test-cases
>>> we have.
>>
>> just two nits:
>>
>> diff --git a/gcc/testsuite/g++.dg/gcov/pr16855-priority.C 
>> b/gcc/testsuite/g++.dg/gcov/pr16855-priority.C
>> new file mode 100644
>> index 000..7e39565
>> --- /dev/null
>> +++ b/gcc/testsuite/g++.dg/gcov/pr16855-priority.C
>> [...]
>> +static void __attribute__ ((constructor ((101 ctor_100 ()
>>
>> Should be called ctor_101 now.  Same for dtor_100 below.
>>
>>  Rainer
>>
> 
> Thanks for the note. Fixed in attached patch.
> 
> Martin
> 



Re: [PATCH, vec-tails] Support loop epilogue vectorization

2016-11-14 Thread Richard Biener
On Fri, 11 Nov 2016, Yuri Rumyantsev wrote:

> Richard,
> 
> I prepare updated 3 patch with passing additional argument to
> vect_analyze_loop as you proposed (untested).
> 
> You wrote:
> tw, I wonder if you can produce a single patch containing just
> epilogue vectorization, that is combine patches 1-3 but rip out
> changes only needed by later patches?
> 
> Did you mean that I exclude all support for vectorization epilogues,
> i.e. exclude from 2-nd patch all non-related changes
> like
> 
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 11863af..32011c1 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -1120,6 +1120,12 @@ new_loop_vec_info (struct loop *loop)
>LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
>LOOP_VINFO_PEELING_FOR_NITER (res) = false;
>LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
> +  LOOP_VINFO_CAN_BE_MASKED (res) = false;
> +  LOOP_VINFO_REQUIRED_MASKS (res) = 0;
> +  LOOP_VINFO_COMBINE_EPILOGUE (res) = false;
> +  LOOP_VINFO_MASK_EPILOGUE (res) = false;
> +  LOOP_VINFO_NEED_MASKING (res) = false;
> +  LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;

Yes.
 
> Did you mean also that new combined patch must be working patch, i.e.
> can be integrated without other patches?

Yes.

> Could you please look at updated patch?

Will do.

Thanks,
Richard.

> Thanks.
> Yuri.
> 
> 2016-11-10 15:36 GMT+03:00 Richard Biener :
> > On Thu, 10 Nov 2016, Richard Biener wrote:
> >
> >> On Tue, 8 Nov 2016, Yuri Rumyantsev wrote:
> >>
> >> > Richard,
> >> >
> >> > Here is updated 3 patch.
> >> >
> >> > I checked that all new tests related to epilogue vectorization passed 
> >> > with it.
> >> >
> >> > Your comments will be appreciated.
> >>
> >> A lot better now.  Instead of the ->aux dance I now prefer to
> >> pass the original loops loop_vinfo to vect_analyze_loop as
> >> optional argument (if non-NULL we analyze the epilogue of that
> >> loop_vinfo).  OTOH I remember we mainly use it to get at the
> >> original vectorization factor?  So we can pass down an (optional)
> >> forced vectorization factor as well?
> >
> > Btw, I wonder if you can produce a single patch containing just
> > epilogue vectorization, that is combine patches 1-3 but rip out
> > changes only needed by later patches?
> >
> > Thanks,
> > Richard.
> >
> >> Richard.
> >>
> >> > 2016-11-08 15:38 GMT+03:00 Richard Biener :
> >> > > On Thu, 3 Nov 2016, Yuri Rumyantsev wrote:
> >> > >
> >> > >> Hi Richard,
> >> > >>
> >> > >> I did not understand your last remark:
> >> > >>
> >> > >> > That is, here (and avoid the FOR_EACH_LOOP change):
> >> > >> >
> >> > >> > @@ -580,12 +586,21 @@ vectorize_loops (void)
> >> > >> >   && dump_enabled_p ())
> >> > >> >   dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
> >> > >> >"loop vectorized\n");
> >> > >> > -   vect_transform_loop (loop_vinfo);
> >> > >> > +   new_loop = vect_transform_loop (loop_vinfo);
> >> > >> > num_vectorized_loops++;
> >> > >> >/* Now that the loop has been vectorized, allow it to be 
> >> > >> > unrolled
> >> > >> >   etc.  */
> >> > >> >  loop->force_vectorize = false;
> >> > >> >
> >> > >> > +   /* Add new loop to a processing queue.  To make it easier
> >> > >> > +  to match loop and its epilogue vectorization in dumps
> >> > >> > +  put new loop as the next loop to process.  */
> >> > >> > +   if (new_loop)
> >> > >> > + {
> >> > >> > +   loops.safe_insert (i + 1, new_loop->num);
> >> > >> > +   vect_loops_num = number_of_loops (cfun);
> >> > >> > + }
> >> > >> >
> >> > >> > simply dispatch to a vectorize_epilogue (loop_vinfo, new_loop)
> >> > >> f> unction which will set up stuff properly (and also perform
> >> > >> > the if-conversion of the epilogue there).
> >> > >> >
> >> > >> > That said, if we can get in non-masked epilogue vectorization
> >> > >> > separately that would be great.
> >> > >>
> >> > >> Could you please clarify your proposal.
> >> > >
> >> > > When a loop was vectorized set things up to immediately vectorize
> >> > > its epilogue, avoiding changing the loop iteration and avoiding
> >> > > the re-use of ->aux.
> >> > >
> >> > > Richard.
> >> > >
> >> > >> Thanks.
> >> > >> Yuri.
> >> > >>
> >> > >> 2016-11-02 15:27 GMT+03:00 Richard Biener :
> >> > >> > On Tue, 1 Nov 2016, Yuri Rumyantsev wrote:
> >> > >> >
> >> > >> >> Hi All,
> >> > >> >>
> >> > >> >> I re-send all patches sent by Ilya earlier for review which support
> >> > >> >> vectorization of loop epilogues and loops with low trip count. We
> >> > >> >> assume that the only patch - vec-tails-07-combine-tail.patch - was 
> >> > >> >> not
> >> > >> >> approved by Jeff.
> >> > >> >>
> >> > >> >> I did re-base of all patches and performed bootstrapping and
> >> > >> >> regression testing that did not show any new failures. Also all
> >> > >> >> changes related to new vect_do_peeling algorithm have been changed
> >> > >> >> ac

Re: [PATCH, vec-tails] Support loop epilogue vectorization

2016-11-14 Thread Richard Biener
On Fri, 11 Nov 2016, Yuri Rumyantsev wrote:

> Richard,
> 
> Here is fixed version of updated patch 3.
> 
> Any comments will be appreciated.

Looks good apart from

+  if (epilogue)
+{
+  epilogue->force_vectorize = loop->force_vectorize;
+  epilogue->safelen = loop->safelen;
+  epilogue->dont_vectorize = false;
+
+  /* We may need to if-convert epilogue to vectorize it.  */
+  if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
+   tree_if_conversion (epilogue);
+
+  gcc_assert (!epilogue->aux);
+  epilogue->aux = loop_vinfo;

where the last two lines should now no longer be necessary?

Thanks,
Richard.

> Thanks.
> Yuri.
> 
> 2016-11-11 17:15 GMT+03:00 Yuri Rumyantsev :
> > Richard,
> >
> > Sorry for confusion but my updated patch  does not work properly, so I
> > need to fix it.
> >
> > Yuri.
> >
> > 2016-11-11 14:15 GMT+03:00 Yuri Rumyantsev :
> >> Richard,
> >>
> >> I prepare updated 3 patch with passing additional argument to
> >> vect_analyze_loop as you proposed (untested).
> >>
> >> You wrote:
> >> tw, I wonder if you can produce a single patch containing just
> >> epilogue vectorization, that is combine patches 1-3 but rip out
> >> changes only needed by later patches?
> >>
> >> Did you mean that I exclude all support for vectorization epilogues,
> >> i.e. exclude from 2-nd patch all non-related changes
> >> like
> >>
> >> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> >> index 11863af..32011c1 100644
> >> --- a/gcc/tree-vect-loop.c
> >> +++ b/gcc/tree-vect-loop.c
> >> @@ -1120,6 +1120,12 @@ new_loop_vec_info (struct loop *loop)
> >>LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
> >>LOOP_VINFO_PEELING_FOR_NITER (res) = false;
> >>LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
> >> +  LOOP_VINFO_CAN_BE_MASKED (res) = false;
> >> +  LOOP_VINFO_REQUIRED_MASKS (res) = 0;
> >> +  LOOP_VINFO_COMBINE_EPILOGUE (res) = false;
> >> +  LOOP_VINFO_MASK_EPILOGUE (res) = false;
> >> +  LOOP_VINFO_NEED_MASKING (res) = false;
> >> +  LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
> >>
> >> Did you mean also that new combined patch must be working patch, i.e.
> >> can be integrated without other patches?
> >>
> >> Could you please look at updated patch?
> >>
> >> Thanks.
> >> Yuri.
> >>
> >> 2016-11-10 15:36 GMT+03:00 Richard Biener :
> >>> On Thu, 10 Nov 2016, Richard Biener wrote:
> >>>
>  On Tue, 8 Nov 2016, Yuri Rumyantsev wrote:
> 
>  > Richard,
>  >
>  > Here is updated 3 patch.
>  >
>  > I checked that all new tests related to epilogue vectorization passed 
>  > with it.
>  >
>  > Your comments will be appreciated.
> 
>  A lot better now.  Instead of the ->aux dance I now prefer to
>  pass the original loops loop_vinfo to vect_analyze_loop as
>  optional argument (if non-NULL we analyze the epilogue of that
>  loop_vinfo).  OTOH I remember we mainly use it to get at the
>  original vectorization factor?  So we can pass down an (optional)
>  forced vectorization factor as well?
> >>>
> >>> Btw, I wonder if you can produce a single patch containing just
> >>> epilogue vectorization, that is combine patches 1-3 but rip out
> >>> changes only needed by later patches?
> >>>
> >>> Thanks,
> >>> Richard.
> >>>
>  Richard.
> 
>  > 2016-11-08 15:38 GMT+03:00 Richard Biener :
>  > > On Thu, 3 Nov 2016, Yuri Rumyantsev wrote:
>  > >
>  > >> Hi Richard,
>  > >>
>  > >> I did not understand your last remark:
>  > >>
>  > >> > That is, here (and avoid the FOR_EACH_LOOP change):
>  > >> >
>  > >> > @@ -580,12 +586,21 @@ vectorize_loops (void)
>  > >> >   && dump_enabled_p ())
>  > >> >   dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
>  > >> >"loop vectorized\n");
>  > >> > -   vect_transform_loop (loop_vinfo);
>  > >> > +   new_loop = vect_transform_loop (loop_vinfo);
>  > >> > num_vectorized_loops++;
>  > >> >/* Now that the loop has been vectorized, allow it to be 
>  > >> > unrolled
>  > >> >   etc.  */
>  > >> >  loop->force_vectorize = false;
>  > >> >
>  > >> > +   /* Add new loop to a processing queue.  To make it easier
>  > >> > +  to match loop and its epilogue vectorization in dumps
>  > >> > +  put new loop as the next loop to process.  */
>  > >> > +   if (new_loop)
>  > >> > + {
>  > >> > +   loops.safe_insert (i + 1, new_loop->num);
>  > >> > +   vect_loops_num = number_of_loops (cfun);
>  > >> > + }
>  > >> >
>  > >> > simply dispatch to a vectorize_epilogue (loop_vinfo, new_loop)
>  > >> f> unction which will set up stuff properly (and also perform
>  > >> > the if-conversion of the epilogue there).
>  > >> >
>  > >> > That said, if we can get in non-masked epilogue vectorization
>  > >> > separately that woul

Re: [PATCH][AArch64 - v3] Simplify eh_return implementation

2016-11-14 Thread Wilco Dijkstra
ping

From: Wilco Dijkstra
Sent: 02 November 2016 16:49
To: Ramana Radhakrishnan; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
    

    

ping


From: Wilco Dijkstra
Sent: 02 September 2016 12:31
To: Ramana Radhakrishnan; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64 - v3] Simplify eh_return implementation
    
Ramana Radhakrishnan wrote:
> Can you please file a PR for this and add some testcases ?  This sounds like 
> a serious enough problem that needs to be looked at probably going back since 
> the dawn of time.

I've created PR77455. Updated patch below:

This patch simplifies the handling of the EH return value.  We force the use of 
the
frame pointer so the return location is always at FP + 8.  This means we can 
emit
a simple volatile access in EH_RETURN_HANDLER_RTX without needing md
patterns, splitters and frame offset calculations.  The new implementation also
fixes various bugs in aarch64_final_eh_return_addr, which does not work with
-fomit-frame-pointer, alloca or outgoing arguments.

Bootstrap OK, GCC Regression OK, OK for trunk? Would it be useful to backport
this to GCC6.x?

ChangeLog:

2016-09-02  Wilco Dijkstra  

    PR77455
gcc/
    * config/aarch64/aarch64.md (eh_return): Remove pattern and splitter.
    * config/aarch64/aarch64.h (AARCH64_EH_STACKADJ_REGNUM): Remove.
    (EH_RETURN_HANDLER_RTX): New define.
    * config/aarch64/aarch64.c (aarch64_frame_pointer_required):
    Force frame pointer in EH return functions.
    (aarch64_expand_epilogue): Add barrier for eh_return.
    (aarch64_final_eh_return_addr): Remove.
    (aarch64_eh_return_handler_rtx): New function.
    * config/aarch64/aarch64-protos.h (aarch64_final_eh_return_addr):
    Remove.
    (aarch64_eh_return_handler_rtx): New prototype.

testsuite/
    * gcc.target/aarch64/eh_return.c: New test.
--
diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 
3cdd69b8af1089a839e5d45cda94bc70a15cd777..327c0a97f6f687604afef249b79ac22628418070
 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -358,7 +358,7 @@ int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
 int aarch64_hard_regno_nregs (unsigned, machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
-rtx aarch64_final_eh_return_addr (void);
+rtx aarch64_eh_return_handler_rtx (void);
 rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
003fec87e41db618570663f28cc2387a87e8252a..fa81e4b853daf08842955288861ec7e7acca
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -400,9 +400,9 @@ extern unsigned aarch64_architecture_version;
 #define ASM_DECLARE_FUNCTION_NAME(STR, NAME, DECL)  \
   aarch64_declare_function_name (STR, NAME, DECL)
 
-/* The register that holds the return address in exception handlers.  */
-#define AARCH64_EH_STACKADJ_REGNUM (R0_REGNUM + 4)
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, AARCH64_EH_STACKADJ_REGNUM)
+/* For EH returns X4 contains the stack adjustment.  */
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM)
+#define EH_RETURN_HANDLER_RTX  aarch64_eh_return_handler_rtx ()
 
 /* Don't use __builtin_setjmp until we've defined it.  */
 #undef DONT_USE_BUILTIN_SETJMP
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
e742c19d76e6c62117aa62a990b9c2945aa06b74..f07d771ea343803e054e03f59c8c1efb698bf474
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2739,6 +2739,10 @@ aarch64_frame_pointer_required (void)
   && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
 return true;
 
+  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
+  if (crtl->calls_eh_return)
+    return true;
+
   return false;
 }
 
@@ -3298,7 +3302,8 @@ aarch64_expand_epilogue (bool for_sibcall)
  + cfun->machine->frame.saved_varargs_size) != 0;
 
   /* Emit a barrier to prevent loads from a deallocated stack.  */
-  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca)
+  if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca
+  || crtl->calls_eh_return)
 {
   emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
   need_barrier_p = false;
@@ -3366,52 +3371,15 @@ aarch64_expand_epilogue (bool for_sibcall)
 emit_jump_insn (ret_rtx);
 }
 
-/* Return the place to copy the exception unwinding return address to.
-   This will probably be a stack slot, but could (in theory be the
-   return register).  */
+/* Implement EH_RETURN_HANDLER_RTX.  The return address is stored at FP + 8.
+   The access needs to be volatile to prevent it from being removed.  */
 rtx
-aarch6

Re: [PATCH][AArch64] Improve SHA1 scheduling

2016-11-14 Thread Wilco Dijkstra

ping


From: Wilco Dijkstra
Sent: 25 October 2016 18:08
To: GCC Patches
Cc: nd
Subject: [PATCH][AArch64] Improve SHA1 scheduling
    
SHA1H instructions may be scheduled after a SHA1C instruction
that uses the same input register.  However SHA1C updates its input,
so if SHA1H is scheduled after it, it requires an extra move.
Increase the priority of SHA1H to ensure it gets scheduled
earlier, avoiding the move.

Is this something the generic scheduler could do automatically for
instructions with RMW operands?

Passes bootstrap & regress. OK for commit?

ChangeLog:
2016-10-25  Wilco Dijkstra  

    * config/aarch64/aarch64.c (aarch64_sched_adjust_priority)
    New function.
    (TARGET_SCHED_ADJUST_PRIORITY): Define target hook.
--
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
9b2f9cb19343828dc39e9950ebbefe941521942a..2b25bd1bdd6f4e7737f8e04c3b3684cdff6c4b80
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -13668,6 +13668,26 @@ aarch64_sched_fusion_priority (rtx_insn *insn, int 
max_pri,
   return;
 }
 
+/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
+   Adjust priority of sha1h instructions so they are scheduled before
+   other SHA1 instructions.  */
+
+static int
+aarch64_sched_adjust_priority (rtx_insn *insn, int priority)
+{
+  rtx x = PATTERN (insn);
+
+  if (GET_CODE (x) == SET)
+    {
+  x = SET_SRC (x);
+
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H)
+   return priority + 10;
+    }
+
+  return priority;
+}
+
 /* Given OPERANDS of consecutive load/store, check if we can merge
    them into ldp/stp.  LOAD is true if they are load instructions.
    MODE is the mode of memory operands.  */
@@ -14431,6 +14451,9 @@ aarch64_optab_supported_p (int op, machine_mode mode1, 
machine_mode,
 #undef TARGET_CAN_USE_DOLOOP_P
 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 
+#undef TARGET_SCHED_ADJUST_PRIORITY
+#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
+
 #undef TARGET_SCHED_MACRO_FUSION_P
 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
 

    

Re: [PATCH v2][AArch64] Fix symbol offset limit

2016-11-14 Thread Wilco Dijkstra

     ping

From: Wilco Dijkstra
Sent: 12 September 2016 15:50
To: Richard Earnshaw; GCC Patches
Cc: nd
Subject: Re: [PATCH v2][AArch64] Fix symbol offset limit
    
Wilco wrote:    
> The original example is from GCC itself, the fixed_regs array is small but 
> due to
> optimization we can end up with &fixed_regs + 0x.

We could also check the bounds of each symbol if they exist, like the patch 
below.


In aarch64_classify_symbol symbols are allowed full-range offsets on 
relocations.
This means the offset can use all of the +/-4GB offset, leaving no offset 
available
for the symbol itself.  This results in relocation overflow and link-time errors
for simple expressions like &global_char + 0xff00.

To avoid this, limit the offset to +/-1GB so that the symbol needs to be within 
a
3GB offset from its references.  For the tiny code model use a 64KB offset, 
allowing
most of the 1MB range for code/data between the symbol and its references.
For symbols with a defined size, limit the offset to be within the size of the 
symbol.

ChangeLog:
2016-09-12  Wilco Dijkstra  

    gcc/
    * config/aarch64/aarch64.c (aarch64_classify_symbol):
    Apply reasonable limit to symbol offsets.

    testsuite/
    * gcc.target/aarch64/symbol-range.c (foo): Set new limit.
    * gcc.target/aarch64/symbol-range-tiny.c (foo): Likewise.

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
385bd560fb12cd5d404e6ddb2f01edf1fe72d729..275a828ac9e6e9b8187380c1b602ffb1b2bcfb21
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9351,6 +9351,8 @@ aarch64_classify_symbol (rtx x, rtx offset)
   if (aarch64_tls_symbol_p (x))
 return aarch64_classify_tls_symbol (x);
 
+  const_tree decl = SYMBOL_REF_DECL (x);
+
   switch (aarch64_cmodel)
 {
 case AARCH64_CMODEL_TINY:
@@ -9359,25 +9361,45 @@ aarch64_classify_symbol (rtx x, rtx offset)
  we have no way of knowing the address of symbol at compile time
  so we can't accurately say if the distance between the PC and
  symbol + offset is outside the addressible range of +/-1M in the
-    TINY code model.  So we rely on images not being greater than
-    1M and cap the offset at 1M and anything beyond 1M will have to
-    be loaded using an alternative mechanism.  Furthermore if the
-    symbol is a weak reference to something that isn't known to
-    resolve to a symbol in this module, then force to memory.  */
+    TINY code model.  So we limit the maximum offset to +/-64KB and
+    assume the offset to the symbol is not larger than +/-(1M - 64KB).
+    Furthermore force to memory if the symbol is a weak reference to
+    something that doesn't resolve to a symbol in this module.  */
   if ((SYMBOL_REF_WEAK (x)
    && !aarch64_symbol_binds_local_p (x))
- || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
+ || !IN_RANGE (INTVAL (offset), -0x1, 0x1))
 return SYMBOL_FORCE_TO_MEM;
+
+ /* Limit offset to within the size of a declaration if available.  */
+ if (decl && DECL_P (decl))
+   {
+ const_tree decl_size = DECL_SIZE (decl);
+
+ if (decl_size
+ && !IN_RANGE (INTVAL (offset), 0, tree_to_shwi (decl_size)))
+   return SYMBOL_FORCE_TO_MEM;
+   }
+
   return SYMBOL_TINY_ABSOLUTE;
 
 case AARCH64_CMODEL_SMALL:
   /* Same reasoning as the tiny code model, but the offset cap here is
-    4G.  */
+    1G, allowing +/-3G for the offset to the symbol.  */
   if ((SYMBOL_REF_WEAK (x)
    && !aarch64_symbol_binds_local_p (x))
- || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263),
-   HOST_WIDE_INT_C (4294967264)))
+ || !IN_RANGE (INTVAL (offset), -0x4000, 0x4000))
 return SYMBOL_FORCE_TO_MEM;
+
+ /* Limit offset to within the size of a declaration if available.  */
+ if (decl && DECL_P (decl))
+   {
+ const_tree decl_size = DECL_SIZE (decl);
+
+ if (decl_size
+ && !IN_RANGE (INTVAL (offset), 0, tree_to_shwi (decl_size)))
+   return SYMBOL_FORCE_TO_MEM;
+   }
+
   return SYMBOL_SMALL_ABSOLUTE;
 
 case AARCH64_CMODEL_TINY_PIC:
diff --git a/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c 
b/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c
index 
d7e46b059e41f2672b3a1da5506fa8944e752e01..d399a3637ed834ddc4bb429594c4ec229b5c2ea8
 100644
--- a/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c
+++ b/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c
@@ -1,12 +1,12 @@
-/* { dg-do compile } */
+/* { dg-do link } */
 /* { dg-options "-O3 -save-temps -mcmodel=tiny" } */
 
-int fixed_regs[0x0

Re: [RFC][PATCH][AArch64] Cleanup frame pointer usage

2016-11-14 Thread Wilco Dijkstra
ping



From: Wilco Dijkstra
Sent: 31 October 2016 18:29
To: GCC Patches
Cc: nd
Subject: [RFC][PATCH][AArch64] Cleanup frame pointer usage
    
This patch cleans up all code related to the frame pointer.  On AArch64 we
emit a frame chain even in cases where the frame pointer is not required.
So make this explicit by introducing a boolean emit_frame_chain in
aarch64_frame record.

When the frame pointer is enabled but not strictly required (eg. no use of
alloca), we emit a frame chain in non-leaf functions, but continue to use the
stack pointer to access locals.  This results in smaller code and unwind info.

Also simplify the complex logic in aarch64_override_options_after_change_1
and compute whether the frame chain is required in aarch64_layout_frame
instead.  As a result aarch64_frame_pointer_required is now redundant and
aarch64_can_eliminate can be greatly simplified.

Finally convert all callee save/restore functions to use gen_frame_mem.

Bootstrap OK. Any comments?

ChangeLog:
2016-10-31  Wilco Dijkstra  

    gcc/
    * config/aarch64/aarch64.h (aarch64_frame):
 Add emit_frame_chain boolean.
    * config/aarch64/aarch64.c (aarch64_frame_pointer_required)
    Remove.
    (aarch64_layout_frame): Initialise emit_frame_chain.
    (aarch64_pushwb_single_reg): Use gen_frame_mem.
    (aarch64_pop_regs): Likewise.
    (aarch64_gen_load_pair): Likewise.
    (aarch64_save_callee_saves): Likewise.
    (aarch64_restore_callee_saves): Likewise.
    (aarch64_expand_prologue): Use emit_frame_chain.
    (aarch64_can_eliminate): Simplify. When FP needed or outgoing
    arguments are large, eliminate to FP, otherwise SP.
    (aarch64_override_options_after_change_1): Simplify.
    (TARGET_FRAME_POINTER_REQUIRED): Remove define.

--

diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 
fa81e4b853daf08842955288861ec7e7acca..6e32dc9f6f171dde0c182fdd7857230251f71712
 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -583,6 +583,9 @@ struct GTY (()) aarch64_frame
   /* The size of the stack adjustment after saving callee-saves.  */
   HOST_WIDE_INT final_adjust;
 
+  /* Store FP,LR and setup a frame pointer.  */
+  bool emit_frame_chain;
+
   unsigned wb_candidate1;
   unsigned wb_candidate2;
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 
f07d771ea343803e054e03f59c8c1efb698bf474..6c06ac18d16f8afa7ee1cc5e8530e285a60e2b0f
 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2728,24 +2728,6 @@ aarch64_output_probe_stack_range (rtx reg1, rtx reg2)
   return "";
 }
 
-static bool
-aarch64_frame_pointer_required (void)
-{
-  /* In aarch64_override_options_after_change
- flag_omit_leaf_frame_pointer turns off the frame pointer by
- default.  Turn it back on now if we've not got a leaf
- function.  */
-  if (flag_omit_leaf_frame_pointer
-  && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
-    return true;
-
-  /* Force a frame pointer for EH returns so the return address is at FP+8.  */
-  if (crtl->calls_eh_return)
-    return true;
-
-  return false;
-}
-
 /* Mark the registers that need to be saved by the callee and calculate
    the size of the callee-saved registers area and frame record (both FP
    and LR may be omitted).  */
@@ -2758,6 +2740,18 @@ aarch64_layout_frame (void)
   if (reload_completed && cfun->machine->frame.laid_out)
 return;
 
+  /* Force a frame chain for EH returns so the return address is at FP+8.  */
+  cfun->machine->frame.emit_frame_chain
+    = frame_pointer_needed || crtl->calls_eh_return;
+
+  /* Emit a frame chain if the frame pointer is enabled.
+ If -momit-leaf-frame-pointer is used, do not use a frame chain
+ in leaf functions which do not use LR.  */
+  if (flag_omit_frame_pointer == 2
+  && !(flag_omit_leaf_frame_pointer && crtl->is_leaf
+  && !df_regs_ever_live_p (LR_REGNUM)))
+    cfun->machine->frame.emit_frame_chain = true;
+
 #define SLOT_NOT_REQUIRED (-2)
 #define SLOT_REQUIRED (-1)
 
@@ -2789,7 +2783,7 @@ aarch64_layout_frame (void)
 && !call_used_regs[regno])
   cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
 
-  if (frame_pointer_needed)
+  if (cfun->machine->frame.emit_frame_chain)
 {
   /* FP and LR are placed in the linkage record.  */
   cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
@@ -2937,7 +2931,7 @@ aarch64_pushwb_single_reg (machine_mode mode, unsigned 
regno,
   reg = gen_rtx_REG (mode, regno);
   mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
 plus_constant (Pmode, base_rtx, -adjustment));
-  mem = gen_rtx_MEM (mode, mem);
+  mem = gen_frame_mem (mode, mem);
 
   insn = emit_move_insn (mem, reg);
   RTX_FRAME_RELATED_P (insn) = 1;
@@ -3011,7 +3005,7 @@ aarch64_pop_regs (unsigned regno1, unsigned regno2, 
HOST_WIDE_INT adjustment,
 {
   rtx mem = plus_constant (Pmode,

Re: [PR target/78213] Do not ICE on non-empty -fself-test

2016-11-14 Thread Bernd Schmidt

On 11/11/2016 06:10 PM, Aldy Hernandez wrote:

The problem in this PR is that -fself-test is being run on a non empty
source file.  This causes init_emit() to run, which sets:

REG_POINTER (virtual_incoming_args_rtx) = 1;

Setting REG_POINTER on the virtual incoming args, causes /f to be
printed on some RTL dumps, causing the -fself-test machinery to fail at
matching the expected value.


How about always running init_emit and testing for the correct output?


Bernd


Fwd: failure notice

2016-11-14 Thread Jack Howarth
-- Forwarded message --
From: 
Date: Mon, Nov 14, 2016 at 8:15 AM
Subject: failure notice
To: howarth.at@gmail.com


Hi. This is the qmail-send program at sourceware.org.
I'm afraid I wasn't able to deliver your message to the following addresses.
This is a permanent error; I've given up. Sorry it didn't work out.

:
Invalid mime type "text/html" detected in message text or
attachment.  Please send plain text messages only.
See http://sourceware.org/lists.html#sourceware-list-info for more information.
Contact gcc-patches-ow...@gcc.gnu.org if you have questions about this. (#5.7.2)

--- Below this line is a copy of the message.

Return-Path: 
Received: (qmail 26838 invoked by uid 89); 14 Nov 2016 13:15:50 -
Authentication-Results: sourceware.org; auth=none
X-Virus-Checked: by ClamAV 0.99.2 on sourceware.org
X-Virus-Found: No
X-Spam-Flag: YES
X-Spam-SWARE-Status: Yes, score=5.5 required=5.0
tests=BAYES_40,FREEMAIL_FROM,HTML_MESSAGE,RCVD_IN_DNSWL_NONE,RCVD_IN_SORBS_SPAM,SPF_PASS
autolearn=no version=3.3.2 spammy=Care, 73, 27336, 7.3
X-Spam-Status: Yes, score=5.5 required=5.0
tests=BAYES_40,FREEMAIL_FROM,HTML_MESSAGE,RCVD_IN_DNSWL_NONE,RCVD_IN_SORBS_SPAM,SPF_PASS
autolearn=no version=3.3.2
X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on sourceware.org
X-Spam-Level: *
X-HELO: mail-yw0-f182.google.com
Received: from mail-yw0-f182.google.com (HELO
mail-yw0-f182.google.com) (209.85.161.182)
 by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Mon,
14 Nov 2016 13:15:39 +
Received: by mail-yw0-f182.google.com with SMTP id a10so57164557ywa.3
for ; Mon, 14 Nov 2016 05:15:39 -0800 (PST)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=gmail.com; s=20120113;
h=mime-version:in-reply-to:references:from:date:message-id:subject:to
 :cc;
bh=ut9+GijOTPsFgXgpyv5pRUOLVxPD0mjqwq3dgw9nVcc=;
b=rnq9uE6RoNcPx/romZSBYrNuw2Z+26adCJWz2sZ+CBzYASOPPo+78xPFGbZcyCbVdb
 kDyv78RNQk33JxySlxjEmg8y6Bz0dB/QJlfrHf1VtGloVAbTpVuIsYS7ouZbJx3PZq9F
 lUyDBT0wUzNethLJaSKUaGVCYUetgurAyI6XB0T9CeiKM5qTv+ih/EM7Nr19v1zS1Hpz
 8sndaqnzhh7ySKfWyB/AwhBrae1By/Iui55/pSwpoMFY2HP0ZmLEJ0Iahz2dGerVhtcr
 hKzYP38ysWs16ACgxYO7Ro8CKG465Z9Er7Q2jNHvgimz3jsoI7/KtIVCptRn5nTDotDU
 2CNg==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
d=1e100.net; s=20130820;
h=x-gm-message-state:mime-version:in-reply-to:references:from:date
 :message-id:subject:to:cc;
bh=ut9+GijOTPsFgXgpyv5pRUOLVxPD0mjqwq3dgw9nVcc=;
b=jjWHtYqA+8b7cqwAbEKsfErUsvN6OoBdn46xqgDyxI18HReNt/iFa2GZbyhDcg2Ao3
 //e4mfm/CSHAfi0yUTkxuYk7h1GVodTbfFZD7ifx00zy9AbB0F0+EVf+fOJAhNOhzBRl
 QwGyokx7YSxoJybduyTr8TEvRZQCRfI2joVCbmj5SxqFiLT0O6PgrvIGwAr3ofIQvpPy
 9AylyMxG4C9g8V+/w98kTYx/vtsAefnXFntyjoUjkX9Irub7yVyDI6DP3CmaKOVJXjJ0
 jpY+SaBYzQxuKP8CY5K+tLdUWPOew8s/Yt0iPTX3Yc9ci7qeklHCdvnFD2/V2QnHltCW
 C9zg==
X-Gm-Message-State:
ABUngvfUQDaYa/k+Z0TyCk8hv9XVzgk7w8548KL3RmaaLIB4I+909ZmhJFsXKI/GVQASQUlEbGTOs1KHYaaxtQ==
X-Received: by 10.202.245.74 with SMTP id t71mr8074538oih.37.1479129337927;
 Mon, 14 Nov 2016 05:15:37 -0800 (PST)
MIME-Version: 1.0
Received: by 10.202.225.212 with HTTP; Mon, 14 Nov 2016 05:15:36 -0800 (PST)
In-Reply-To: 

References: 

 

From: Jack Howarth 
Date: Mon, 14 Nov 2016 08:15:36 -0500
Message-ID: 
Subject: Re: [fixincludes] Fix macOS 10.12  and
  (PR sanitizer/78267)
To: Rainer Orth 
Cc: GCC Patches , Bruce Korb 
Content-Type: multipart/mixed; boundary=001a113d2bf2c3bb0e054142a2bd

--001a113d2bf2c3bb0e054142a2bd
Content-Type: multipart/alternative; boundary=001a113d2bf2c3bb08054142a2bb

--001a113d2bf2c3bb08054142a2bb
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: quoted-printable

Rainer,
 In case it helps, attached is a diff of the os/trace.h header from OS
X 11.11 vs that from MacOS X 11.12. It looks like the context of the
changes that you are trying to apply into are different between the two OS
releases and are the cause of the build failures on darwin15.
 Jack


On Sun, Nov 13, 2016 at 1:19 PM, Jack Howarth 
wrote:

>
>
> On Sun, Nov 13, 2016 at 5:53 AM, Rainer Orth 
> wrote:
>
>> Hi Jack,
>>
>> > On darwin15, the proposed patch is insufficient to restore the bootstr=
ap
>> > (after running genfixes in the fixincludes directory) unless I also
>> apply
>> > the previously proposed change...
>>
>> no wonder: it's only been tested on darwin16.  Care to explain what
>> error you're seeing?
>>
>>
> The failure that I see on darwin15 using your proposed patches and
> executing genfixes in fixincludes  before the build is...
>
> libtool: compile:  /sw/src/fink.build/gcc7-7.0.0-1/darwin_objdir/./gcc/xg=
cc
> -shared-libgcc -B/sw/src/fink.build/gcc7-7.0.0-1/darwin_objdir/./gcc
> -nostdinc++ -L/sw/src/fink.build/gcc7-7.0.0-1/darwin_objdir/x86_64-
> apple-darwin15.6.0/libstdc++-v3/src -L/sw/src/fink.build/gcc7-7.0.
> 0-1/darwin_objdir/x86

Re: [PR target/78213] Do not ICE on non-empty -fself-test

2016-11-14 Thread Jakub Jelinek
On Mon, Nov 14, 2016 at 02:18:02PM +0100, Bernd Schmidt wrote:
> On 11/11/2016 06:10 PM, Aldy Hernandez wrote:
> >The problem in this PR is that -fself-test is being run on a non empty
> >source file.  This causes init_emit() to run, which sets:
> >
> >REG_POINTER (virtual_incoming_args_rtx) = 1;
> >
> >Setting REG_POINTER on the virtual incoming args, causes /f to be
> >printed on some RTL dumps, causing the -fself-test machinery to fail at
> >matching the expected value.
> 
> How about always running init_emit and testing for the correct output?

You mean only if -fself-test, right?

Jakub


Re: [PR target/78213] Do not ICE on non-empty -fself-test

2016-11-14 Thread Bernd Schmidt

On 11/14/2016 02:20 PM, Jakub Jelinek wrote:

On Mon, Nov 14, 2016 at 02:18:02PM +0100, Bernd Schmidt wrote:

On 11/11/2016 06:10 PM, Aldy Hernandez wrote:

The problem in this PR is that -fself-test is being run on a non empty
source file.  This causes init_emit() to run, which sets:

   REG_POINTER (virtual_incoming_args_rtx) = 1;

Setting REG_POINTER on the virtual incoming args, causes /f to be
printed on some RTL dumps, causing the -fself-test machinery to fail at
matching the expected value.


How about always running init_emit and testing for the correct output?


You mean only if -fself-test, right?


I guess.


Bernd



Re: [PATCH v2] aarch64: Add split-stack initial support

2016-11-14 Thread Adhemerval Zanella
Ping.

On 07/11/2016 16:59, Adhemerval Zanella wrote:
> 
> 
> On 14/10/2016 15:59, Wilco Dijkstra wrote:
>> Hi,
>>
> 
> Thanks for the thoughtful review and sorry for late response. 
> 
>>> Split-stack prologue on function entry is as follow (this goes before the
>>> usual function prologue):
>>
>>> mrsx9, tpidr_el0
>>> movx10, -
>>
>> As Jiong already remarked, the nop won't work. Do we know the maximum 
>> adjustment
>> that the linker is allowed to make? If so, and we can limit the adjustment 
>> to 16MB in
>> most cases, emitting 2 subtracts is best. Larger offset need mov/movk/sub 
>> but that
>> should be extremely rare.
> 
> There is no limit afaik on gold split stack allocation handling,
> and I think one could be added for each backend (in the method
> override require to implement it).
> 
> In fact it is not really required to tie the nop generation with the
> instruction generated by 'aarch64_internal_mov_immediate', it is
> just a matter to simplify linker code.  
> 
> And although 16MB should be rare, nilptr2.go tests allocates 134217824
> so this test fails with this low stack limit.  I am not sure how well
> is the stack usage on 'go', but I think we should at least support
> current testcase scenario.  So for current iteration I kept my
> current approach, but I am open to suggestions.
> 
> 
>>
>>> nop/movk
>>
>>> addx10, sp, x10
>>> ldrx9, [x9, 16]
>>
>> Is there any need to detect underflow of x10 or is there a guarantee that 
>> stacks are
>> never allocated in the low 2GB (given the maximum adjustment is 2GB)? It's 
>> safe
>> to do a signed comparison.
> 
> I do not think so, at least none of current backend that implements
> split stack do so.
> 
>>
>>> cmpx10, x9
>>> b.csenough
>>
>> Why save/restore x30 and the call x30+8 trick when we could pass the
>> continuation address and use a tailcall? That also avoids emitting extra 
>> unwind info.
>>
>>> stpx30, [sp, -16]
>>> bl __morestack
>>> ldpx30, [sp], 16
>>> ret
>>
>> This part doesn't make any sense - both x28 and carry flag as an input, and 
>> spread
>> across the prolog - why???
>>
>>> enough:
>>> mov x10, sp
>>  [prolog]
>>> b.cscontinue
>>> mov x10, x28
>> continue:
>>  [rest of function]
>>
>> Why not do this?
>>
>> function:
>>  mrsx9, tpidr_el0
>>  subx10, sp, N & 0xfff000
>>  subx10, x10, N & 0xfff
>>  ldrx9, [x9, 16]
>>  adr x12, main_fn_entry
>>  movx11, sp   [if function has stacked arguments]
>>  cmpx10, x9
>>  b.gemain_fn_entry
>>  b __morestack
>> main_fn_entry: [x11 is argument pointer]
>>  [prolog]
>>  [rest of function]
>>
>> In __morestack you need to save x8 as well (another argument register!) and 
>> x12 (the 
>> continuation address). After returning from the call x8 doesn't need to be 
>> preserved.
> 
> Indeed this strategy is way better and I adjusted the code follow it.
> The only change is I am using a:
> 
>   [...]
>   cmp x9, x10
>   b.ltmain_fn_entr
>   b   __morestack.
>   [...]
> 
> So I can issue a 'cmp , 0' on __morestack to indicate
> the function was called.
> 
>>
>> There are several issues with unwinding in __morestack. x28 is not described 
>> as a callee-save
>> so will be corrupted if unwinding across a __morestack call. This won't 
>> unwind correctly after
>> the ldp as the unwinder will use the restored frame pointer to try to 
>> restore x29/x30:
>>
>> +ldp x29, x30, [x28, STACKFRAME_BASE]
>> +ldr x28, [x28, STACKFRAME_BASE + 80]
>> +
>> +.cfi_remember_state
>> +.cfi_restore 30
>> +.cfi_restore 29
>> +.cfi_def_cfa 31, 0
> 
> Indeed, it misses x28 save/restore. I think I have added the missing bits, 
> but I
> must confess that I am not well versed in CFI directives.  I will appreciate 
> if 
> you could help me on this new version.
> 
>>
>> This stores a random x30 value on the stack, what is the purpose of this? 
>> Nothing can unwind
>> to here:
>>
>> +# Start using new stack
>> +stp x29, x30, [x0, -16]!
>> +mov sp, x0
>>
>> Also we no longer need split_stack_arg_pointer_used_p () or any code that 
>> uses it (functions
>> that don't have any arguments passed on the stack could omit the mov x11, 
>> sp).
> 
> Right, we new strategy you proposed to do a branch this is indeed not
> really required.  I remove it from on this new patch.
> 
>>
>> Wilco
>>


Re: [PATCH, vec-tails] Support loop epilogue vectorization

2016-11-14 Thread Yuri Rumyantsev
Richard,

In my previous patch I forgot to remove couple lines related to aux field.
Here is the correct updated patch.

Thanks.
Yuri.

2016-11-14 15:51 GMT+03:00 Richard Biener :
> On Fri, 11 Nov 2016, Yuri Rumyantsev wrote:
>
>> Richard,
>>
>> I prepare updated 3 patch with passing additional argument to
>> vect_analyze_loop as you proposed (untested).
>>
>> You wrote:
>> tw, I wonder if you can produce a single patch containing just
>> epilogue vectorization, that is combine patches 1-3 but rip out
>> changes only needed by later patches?
>>
>> Did you mean that I exclude all support for vectorization epilogues,
>> i.e. exclude from 2-nd patch all non-related changes
>> like
>>
>> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
>> index 11863af..32011c1 100644
>> --- a/gcc/tree-vect-loop.c
>> +++ b/gcc/tree-vect-loop.c
>> @@ -1120,6 +1120,12 @@ new_loop_vec_info (struct loop *loop)
>>LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
>>LOOP_VINFO_PEELING_FOR_NITER (res) = false;
>>LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
>> +  LOOP_VINFO_CAN_BE_MASKED (res) = false;
>> +  LOOP_VINFO_REQUIRED_MASKS (res) = 0;
>> +  LOOP_VINFO_COMBINE_EPILOGUE (res) = false;
>> +  LOOP_VINFO_MASK_EPILOGUE (res) = false;
>> +  LOOP_VINFO_NEED_MASKING (res) = false;
>> +  LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
>
> Yes.
>
>> Did you mean also that new combined patch must be working patch, i.e.
>> can be integrated without other patches?
>
> Yes.
>
>> Could you please look at updated patch?
>
> Will do.
>
> Thanks,
> Richard.
>
>> Thanks.
>> Yuri.
>>
>> 2016-11-10 15:36 GMT+03:00 Richard Biener :
>> > On Thu, 10 Nov 2016, Richard Biener wrote:
>> >
>> >> On Tue, 8 Nov 2016, Yuri Rumyantsev wrote:
>> >>
>> >> > Richard,
>> >> >
>> >> > Here is updated 3 patch.
>> >> >
>> >> > I checked that all new tests related to epilogue vectorization passed 
>> >> > with it.
>> >> >
>> >> > Your comments will be appreciated.
>> >>
>> >> A lot better now.  Instead of the ->aux dance I now prefer to
>> >> pass the original loops loop_vinfo to vect_analyze_loop as
>> >> optional argument (if non-NULL we analyze the epilogue of that
>> >> loop_vinfo).  OTOH I remember we mainly use it to get at the
>> >> original vectorization factor?  So we can pass down an (optional)
>> >> forced vectorization factor as well?
>> >
>> > Btw, I wonder if you can produce a single patch containing just
>> > epilogue vectorization, that is combine patches 1-3 but rip out
>> > changes only needed by later patches?
>> >
>> > Thanks,
>> > Richard.
>> >
>> >> Richard.
>> >>
>> >> > 2016-11-08 15:38 GMT+03:00 Richard Biener :
>> >> > > On Thu, 3 Nov 2016, Yuri Rumyantsev wrote:
>> >> > >
>> >> > >> Hi Richard,
>> >> > >>
>> >> > >> I did not understand your last remark:
>> >> > >>
>> >> > >> > That is, here (and avoid the FOR_EACH_LOOP change):
>> >> > >> >
>> >> > >> > @@ -580,12 +586,21 @@ vectorize_loops (void)
>> >> > >> >   && dump_enabled_p ())
>> >> > >> >   dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
>> >> > >> >"loop vectorized\n");
>> >> > >> > -   vect_transform_loop (loop_vinfo);
>> >> > >> > +   new_loop = vect_transform_loop (loop_vinfo);
>> >> > >> > num_vectorized_loops++;
>> >> > >> >/* Now that the loop has been vectorized, allow it to be 
>> >> > >> > unrolled
>> >> > >> >   etc.  */
>> >> > >> >  loop->force_vectorize = false;
>> >> > >> >
>> >> > >> > +   /* Add new loop to a processing queue.  To make it easier
>> >> > >> > +  to match loop and its epilogue vectorization in dumps
>> >> > >> > +  put new loop as the next loop to process.  */
>> >> > >> > +   if (new_loop)
>> >> > >> > + {
>> >> > >> > +   loops.safe_insert (i + 1, new_loop->num);
>> >> > >> > +   vect_loops_num = number_of_loops (cfun);
>> >> > >> > + }
>> >> > >> >
>> >> > >> > simply dispatch to a vectorize_epilogue (loop_vinfo, new_loop)
>> >> > >> f> unction which will set up stuff properly (and also perform
>> >> > >> > the if-conversion of the epilogue there).
>> >> > >> >
>> >> > >> > That said, if we can get in non-masked epilogue vectorization
>> >> > >> > separately that would be great.
>> >> > >>
>> >> > >> Could you please clarify your proposal.
>> >> > >
>> >> > > When a loop was vectorized set things up to immediately vectorize
>> >> > > its epilogue, avoiding changing the loop iteration and avoiding
>> >> > > the re-use of ->aux.
>> >> > >
>> >> > > Richard.
>> >> > >
>> >> > >> Thanks.
>> >> > >> Yuri.
>> >> > >>
>> >> > >> 2016-11-02 15:27 GMT+03:00 Richard Biener :
>> >> > >> > On Tue, 1 Nov 2016, Yuri Rumyantsev wrote:
>> >> > >> >
>> >> > >> >> Hi All,
>> >> > >> >>
>> >> > >> >> I re-send all patches sent by Ilya earlier for review which 
>> >> > >> >> support
>> >> > >> >> vectorization of loop epilogues and loops with low trip count. We
>> >> > >> >> assume that the only patch - vec-tails-07-comb

Re: [libstdc++, testsuite] Add dg-require-thread-fence

2016-11-14 Thread Christophe Lyon
On 20 October 2016 at 19:40, Jonathan Wakely  wrote:
> On 20/10/16 10:33 -0700, Mike Stump wrote:
>>
>> On Oct 20, 2016, at 9:34 AM, Jonathan Wakely  wrote:
>>>
>>>
>>> On 20/10/16 09:26 -0700, Mike Stump wrote:

 On Oct 20, 2016, at 5:20 AM, Jonathan Wakely  wrote:
>
>
> I am considering leaving this in the ARM backend to force people to
> think what they want to do about thread safety with statics and C++
> on bare-metal systems.
>>>
>>>
>>> The quoting makes it look like those are my words, but I was quoting
>>> Ramana from https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02751.html
>>>
 Not quite in the GNU spirit?  The port people should decide the best way
 to get as much functionality as possible and everything should just work, 
 no
 sharp edges.

 Forcing people to think sounds like a sharp edge?
>>>
>>>
>>> I'm inclined to agree, but we are talking about bare metal systems,
>>
>>
>> So?  gcc has been doing bare metal systems for more than 2 years now.  It
>> is pretty good at it.  All my primary targets today are themselves bare
>> metal systems (I test with newlib).
>>
>>> where there is no one-size-fits-all solution.
>>
>>
>> Configurations are like ice cream cones.  Everyone gets their flavor no
>> matter how weird or strange.  Putting nails in a cone because you don't know
>> if they like vanilla or chocolate isn't reasonable.  If you want, make two
>> flavors, and vend two, if you want to just do one, pick the flavor and vend
>> it.  Put an enum #define default_flavor vanilla, and you then have support
>> for any flavor you want.  Want to add a configure option for the flavor
>> select, add it.  You want to make a -mflavor=chocolate option, add it.  gcc
>> is literally littered with these things.
>
>
> Like I said, you can either build the library with
> -fno-threadsafe-statics or you can provide a definition of the missing
> symbol.
>
I gave this a try (using CXXFLAGS_FOR_TARGET=-fno-threadsafe-statics).
It seems to do the trick indeed: almost all tests now pass, the flag is added
to testcase compilation.

Among the 6 remaining failures, I noticed these two:
- experimental/type_erased_allocator/2.cc: still complains about the missing
__sync_synchronize. Does it need dg-require-thread-fence?

- abi/header_cxxabi.c complains because the option is not valid for C.
I can see the test is already skipped for other C++-only options: it is OK
if I submit a patch to skip it if -fno-threadsafe-statics is used?


I think I'm going to use this flag in validations from now on (target
arm-none-eabi
only, with default mode/cpu/fpu).

Thanks,

Christophe

>> Anything vended should just work.  If it doesn't, that's a bug that needs
>> fixing.  If a port person doesn't understand, we can educate them why _it
>> just works_, is a nice design philosophy; maybe it is new to them.
>
>
> Which is basically what I'm saying. Marking 3000 tests UNSUPPORTED to
> make some test results look clean is not a fix for anything.
>
>
>>> Choosing something that makes most of the library unusable will upset one
>>> group of people, and
>>> choosing something that adds overhead that could be avoided will upset
>>> another group.
>>
>>
>> No, this is a misunderstanding.  Users want things to just work, really.
>> Bosses often like it when things just work as well; so it's not just users.
>> Customers often like it as well.  Anyway, that's my experience.
>
>
>
> OK, I'll put it another way. Under no circumstances am I going to
> accept a patch that requires adding the same redundant directive to
> every single 'do-dg run' test in libstdc++-v3/testsuite/.
>
> Right now I don't care how or if the FAILs get fixed, but it won't be
> by individually marking every file as UNSUPPORTED.
>
>


Re: [patch, avr] Add flash size to device info and make wrap around default

2016-11-14 Thread Pitchumani Sivanupandi

Ping!

On Thursday 10 November 2016 01:53 PM, Pitchumani Sivanupandi wrote:

On Wednesday 09 November 2016 08:05 PM, Georg-Johann Lay wrote:

On 09.11.2016 10:14, Pitchumani Sivanupandi wrote:

On Tuesday 08 November 2016 02:57 PM, Georg-Johann Lay wrote:

On 08.11.2016 08:08, Pitchumani Sivanupandi wrote:
I have updated patch to include the flash size as well. Took that 
info from
device headers (it was fed into crt's device information note 
section also).


The new option would render -mn-flash superfluous, but we should 
keep it for

backward compatibility.

Ok.

Shouldn't link_pmem_wrap then be removed from link_relax, i.e. from
LINK_RELAX_SPEC?  And what happens if relaxation is off?

Yes. Removed link_pmem_wrap from link_relax.
Disabling relaxation doesn't change -mpmem-wrap-around behavior.

flashsize-and-wrap-around.patch


diff --git a/gcc/config/avr/avr-mcus.def 
b/gcc/config/avr/avr-mcus.def

index 6bcc6ff..9d4aa1a 100644



 /*



 /* Classic, > 8K, <= 64K.  */
-AVR_MCU ("avr3", ARCH_AVR3, AVR_ISA_NONE, 
NULL,0x0060, 0x0, 1)
-AVR_MCU ("at43usb355",   ARCH_AVR3, AVR_ISA_NONE, 
"__AVR_AT43USB355__",0x0060, 0x0, 1)
-AVR_MCU ("at76c711", ARCH_AVR3, AVR_ISA_NONE, 
"__AVR_AT76C711__",  0x0060, 0x0, 1)
+AVR_MCU ("avr3", ARCH_AVR3, AVR_ISA_NONE, 
NULL,0x0060, 0x0, 1, 0x6000)
+AVR_MCU ("at43usb355",   ARCH_AVR3, AVR_ISA_NONE, 
"__AVR_AT43USB355__",0x0060, 0x0, 1, 0x6000)
+AVR_MCU ("at76c711", ARCH_AVR3, AVR_ISA_NONE, 
"__AVR_AT76C711__",  0x0060, 0x0, 1, 0x4000)
+AVR_MCU ("at43usb320",   ARCH_AVR3, AVR_ISA_NONE, 
"__AVR_AT43USB320__",0x0060, 0x0, 1, 0x1)

 /* Classic, == 128K.  */
-AVR_MCU ("avr31",ARCH_AVR31, AVR_ERRATA_SKIP, 
NULL,0x0060, 0x0, 2)
-AVR_MCU ("atmega103",ARCH_AVR31, AVR_ERRATA_SKIP, 
"__AVR_ATmega103__", 0x0060, 0x0, 2)
-AVR_MCU ("at43usb320",   ARCH_AVR31, AVR_ISA_NONE, 
"__AVR_AT43USB320__",   0x0060, 0x0, 2)
+AVR_MCU ("avr31",ARCH_AVR31, AVR_ERRATA_SKIP, 
NULL,0x0060, 0x0, 2, 0x2)
+AVR_MCU ("atmega103",ARCH_AVR31, AVR_ERRATA_SKIP, 
"__AVR_ATmega103__", 0x0060, 0x0, 2, 0x2)

 /* Classic + MOVW + JMP/CALL.  */


If at43usb320 is in the wrong multilib, then this should be handled 
as separate issue / patch together with its own PR. Sorry for the 
confusion.  I just noticed that some fields don't match...


It is not even clear to me from the data sheet if avr3 is the correct 
multilib or perhaps avr35 (if it supports MOVW) or even avr5 (if it 
also has MUL) as there is no reference to the exact instruction set 
-- Atmochip will know.


Moreover, such a change should be sync'ed with avr-libc as all 
multilib stuff is hand-wired there: no use of --print-foo meta 
information retrieval by avr-libc :-((


I filed PR78275 and https://savannah.nongnu.org/bugs/index.php?49565 
for this one.


Thats better. I've attached the updated patch. If OK, could someone 
commit please?


I'll try if I could find some more info for AT43USB320.

Regards,
Pitchumani





[Patch, fortran, RFC] Add warning for missing location information

2016-11-14 Thread Thomas Koenig

Hello world,

the attached patch runs through gfortran's AST to check for missing
location information.

If something is found, a warning is issued.  I chose not to issue an
error because missing location information does not make a program
invalid.  The patch would cause testsuite regressions for changes
which cause missing location information, which is a good thing because
we want to catch missing locations as soon as they are (potentially)
introduced.

The checking is turned off if CHECKING_P is not defined, so it should
not slow down release compilers.

I used DK_NOTE here, but that is open to discussion.  Maybe it would be
better to define another warning class for this?

Regression-tested "as is".  I also tested that a note is actually
issued by temporarily re-introducing a bug that has been fixed in
the meantime.

What do you think?  OK for trunk, or should something be done
differently?

Regards

Thomas


Re: [PATCH, vec-tails] Support loop epilogue vectorization

2016-11-14 Thread Richard Biener
On Mon, 14 Nov 2016, Yuri Rumyantsev wrote:

> Richard,
> 
> In my previous patch I forgot to remove couple lines related to aux field.
> Here is the correct updated patch.

Yeah, I noticed.  This patch would be ok for trunk (together with
necessary parts from 1 and 2) if all not required parts are removed
(and you'd add the testcases covering non-masked tail vect).

Thus, can you please produce a single complete patch containing only
non-masked epilogue vectoriziation?

Thanks,
Richard.

> Thanks.
> Yuri.
> 
> 2016-11-14 15:51 GMT+03:00 Richard Biener :
> > On Fri, 11 Nov 2016, Yuri Rumyantsev wrote:
> >
> >> Richard,
> >>
> >> I prepare updated 3 patch with passing additional argument to
> >> vect_analyze_loop as you proposed (untested).
> >>
> >> You wrote:
> >> tw, I wonder if you can produce a single patch containing just
> >> epilogue vectorization, that is combine patches 1-3 but rip out
> >> changes only needed by later patches?
> >>
> >> Did you mean that I exclude all support for vectorization epilogues,
> >> i.e. exclude from 2-nd patch all non-related changes
> >> like
> >>
> >> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> >> index 11863af..32011c1 100644
> >> --- a/gcc/tree-vect-loop.c
> >> +++ b/gcc/tree-vect-loop.c
> >> @@ -1120,6 +1120,12 @@ new_loop_vec_info (struct loop *loop)
> >>LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
> >>LOOP_VINFO_PEELING_FOR_NITER (res) = false;
> >>LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
> >> +  LOOP_VINFO_CAN_BE_MASKED (res) = false;
> >> +  LOOP_VINFO_REQUIRED_MASKS (res) = 0;
> >> +  LOOP_VINFO_COMBINE_EPILOGUE (res) = false;
> >> +  LOOP_VINFO_MASK_EPILOGUE (res) = false;
> >> +  LOOP_VINFO_NEED_MASKING (res) = false;
> >> +  LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
> >
> > Yes.
> >
> >> Did you mean also that new combined patch must be working patch, i.e.
> >> can be integrated without other patches?
> >
> > Yes.
> >
> >> Could you please look at updated patch?
> >
> > Will do.
> >
> > Thanks,
> > Richard.
> >
> >> Thanks.
> >> Yuri.
> >>
> >> 2016-11-10 15:36 GMT+03:00 Richard Biener :
> >> > On Thu, 10 Nov 2016, Richard Biener wrote:
> >> >
> >> >> On Tue, 8 Nov 2016, Yuri Rumyantsev wrote:
> >> >>
> >> >> > Richard,
> >> >> >
> >> >> > Here is updated 3 patch.
> >> >> >
> >> >> > I checked that all new tests related to epilogue vectorization passed 
> >> >> > with it.
> >> >> >
> >> >> > Your comments will be appreciated.
> >> >>
> >> >> A lot better now.  Instead of the ->aux dance I now prefer to
> >> >> pass the original loops loop_vinfo to vect_analyze_loop as
> >> >> optional argument (if non-NULL we analyze the epilogue of that
> >> >> loop_vinfo).  OTOH I remember we mainly use it to get at the
> >> >> original vectorization factor?  So we can pass down an (optional)
> >> >> forced vectorization factor as well?
> >> >
> >> > Btw, I wonder if you can produce a single patch containing just
> >> > epilogue vectorization, that is combine patches 1-3 but rip out
> >> > changes only needed by later patches?
> >> >
> >> > Thanks,
> >> > Richard.
> >> >
> >> >> Richard.
> >> >>
> >> >> > 2016-11-08 15:38 GMT+03:00 Richard Biener :
> >> >> > > On Thu, 3 Nov 2016, Yuri Rumyantsev wrote:
> >> >> > >
> >> >> > >> Hi Richard,
> >> >> > >>
> >> >> > >> I did not understand your last remark:
> >> >> > >>
> >> >> > >> > That is, here (and avoid the FOR_EACH_LOOP change):
> >> >> > >> >
> >> >> > >> > @@ -580,12 +586,21 @@ vectorize_loops (void)
> >> >> > >> >   && dump_enabled_p ())
> >> >> > >> >   dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, 
> >> >> > >> > vect_location,
> >> >> > >> >"loop vectorized\n");
> >> >> > >> > -   vect_transform_loop (loop_vinfo);
> >> >> > >> > +   new_loop = vect_transform_loop (loop_vinfo);
> >> >> > >> > num_vectorized_loops++;
> >> >> > >> >/* Now that the loop has been vectorized, allow it to be 
> >> >> > >> > unrolled
> >> >> > >> >   etc.  */
> >> >> > >> >  loop->force_vectorize = false;
> >> >> > >> >
> >> >> > >> > +   /* Add new loop to a processing queue.  To make it easier
> >> >> > >> > +  to match loop and its epilogue vectorization in dumps
> >> >> > >> > +  put new loop as the next loop to process.  */
> >> >> > >> > +   if (new_loop)
> >> >> > >> > + {
> >> >> > >> > +   loops.safe_insert (i + 1, new_loop->num);
> >> >> > >> > +   vect_loops_num = number_of_loops (cfun);
> >> >> > >> > + }
> >> >> > >> >
> >> >> > >> > simply dispatch to a vectorize_epilogue (loop_vinfo, new_loop)
> >> >> > >> f> unction which will set up stuff properly (and also perform
> >> >> > >> > the if-conversion of the epilogue there).
> >> >> > >> >
> >> >> > >> > That said, if we can get in non-masked epilogue vectorization
> >> >> > >> > separately that would be great.
> >> >> > >>
> >> >> > >> Could you please clarify your proposal.
> >> >> > >
> >> >> > > When a loop was vecto

Re: [PATCH 00/11] more rtx_insn * stuff

2016-11-14 Thread Bernd Schmidt

On 11/14/2016 09:09 AM, tbsaunde+...@tbsaunde.org wrote:

From: Trevor Saunders 

Hi,

Basically $subject which gets rid of a few more casts over all.

I ment to get this out a little while back, but life got busy, and I didn't
read the status announcement properly, so virtually working from hawaii for
now. patches individually built and regtested on x86_64-linux-gnu, and series
run through config-list.mk, ok?


Ok for all except #3 and #11.

For #3, I just don't like increasing indentation like that, I prefer to 
just declare the variable earlier.

#11 does unexplained things with dyn_casts (why not as_a?) and templates.


Bernd


Re: [Patch, fortran, RFC] Add warning for missing location information

2016-11-14 Thread Thomas Koenig



the attached patch runs through gfortran's AST to check for missing
location information.


... this time with attachment.

2016-11-14  Thomas Koenig  

PR fortran/78226
* error.c (gfc_warning_internal):  New function.
* frontend-passes.c (CHECK_LOCUS):  New macro.
(gfc_run_passes):  Call check_locus if CHECK_LOCUS
is defined.
(check_locus_code):  New function.
(check_locus_expr):  New function.
(check_locus):  New function.
* gfortran.h:  Add prototype for gfc_warning_internal.


Index: error.c
===
--- error.c	(Revision 242335)
+++ error.c	(Arbeitskopie)
@@ -1160,7 +1160,25 @@ gfc_warning_now (int opt, const char *gmsgid, ...)
   return ret;
 }
 
+/* Internal warning, do not buffer.  */
 
+bool
+gfc_warning_internal (int opt, const char *gmsgid, ...)
+{
+  va_list argp;
+  diagnostic_info diagnostic;
+  rich_location rich_loc (line_table, UNKNOWN_LOCATION);
+  bool ret;
+
+  va_start (argp, gmsgid);
+  diagnostic_set_info (&diagnostic, gmsgid, &argp, &rich_loc,
+		   DK_NOTE);
+  diagnostic.option_index = opt;
+  ret = report_diagnostic (&diagnostic);
+  va_end (argp);
+  return ret;
+}
+
 /* Immediate error (i.e. do not buffer).  */
 
 void
Index: frontend-passes.c
===
--- frontend-passes.c	(Revision 242335)
+++ frontend-passes.c	(Arbeitskopie)
@@ -27,6 +27,14 @@ along with GCC; see the file COPYING3.  If not see
 #include "constructor.h"
 #include "intrinsic.h"
 
+/* Conditional compilations.  */
+
+#ifdef CHECKING_P
+#define CHECK_LOCUS 1
+#else
+#undef CHECK_LOCUS
+#endif
+
 /* Forward declarations.  */
 
 static void strip_function_call (gfc_expr *);
@@ -48,6 +56,10 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_
   locus *, gfc_namespace *,
   char *vname=NULL);
 
+#ifdef CHECK_LOCUS
+static void check_locus (gfc_namespace *);
+#endif
+
 /* How deep we are inside an argument list.  */
 
 static int count_arglist;
@@ -127,6 +139,10 @@ gfc_run_passes (gfc_namespace *ns)
   doloop_list.release ();
   int w, e;
 
+#ifdef CHECK_LOCUS
+  check_locus (ns);
+#endif
+
   if (flag_frontend_optimize)
 {
   optimize_namespace (ns);
@@ -145,6 +161,53 @@ gfc_run_passes (gfc_namespace *ns)
 realloc_strings (ns);
 }
 
+#ifdef CHECK_LOCUS
+
+/* Callback function: Warn if there is no location information in a
+   statement.  */
+
+static int
+check_locus_code (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
+		  void *data ATTRIBUTE_UNUSED)
+{
+  current_code = c;
+  if (c && *c && (((*c)->loc.nextc == NULL) || ((*c)->loc.lb == NULL)))
+gfc_warning_internal (0, "No location in statement statement");
+
+  return 0;
+}
+
+/* Callback function: Warn if there is no location information in an
+   expression.  */
+
+static int
+check_locus_expr (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED,
+		  void *data ATTRIBUTE_UNUSED)
+{
+
+  if (e && *e && (((*e)->where.nextc == NULL || (*e)->where.lb == NULL)))
+gfc_warning_internal (0, "No location in expression near %L",
+			  &((*current_code)->loc));
+  return 0;
+
+}
+
+/* Run check for missing location information.  */
+
+static void
+check_locus (gfc_namespace *ns)
+{
+  gfc_code_walker (&ns->code, check_locus_code, check_locus_expr, NULL);
+
+  for (ns = ns->contained; ns; ns = ns->sibling)
+{
+  if (ns->code == NULL || ns->code->op != EXEC_BLOCK)
+	check_locus (ns);
+}
+}
+
+#endif
+
 /* Callback for each gfc_code node invoked from check_realloc_strings.
For an allocatable LHS string which also appears as a variable on
the RHS, replace
Index: gfortran.h
===
--- gfortran.h	(Revision 242335)
+++ gfortran.h	(Arbeitskopie)
@@ -2778,6 +2778,7 @@ const char *gfc_print_wide_char (gfc_char_t);
 
 bool gfc_warning (int opt, const char *, ...) ATTRIBUTE_GCC_GFC(2,3);
 bool gfc_warning_now (int opt, const char *, ...) ATTRIBUTE_GCC_GFC(2,3);
+bool gfc_warning_internal (int opt, const char *, ...) ATTRIBUTE_GCC_GFC(2,3);
 bool gfc_warning_now_at (location_t loc, int opt, const char *gmsgid, ...)
   ATTRIBUTE_GCC_GFC(3,4);
 


Re: [PATCH] Support no newline in print_gimple_stmt

2016-11-14 Thread Richard Biener
On Fri, Nov 11, 2016 at 4:07 PM, Martin Liška  wrote:
> On 11/11/2016 01:10 PM, Richard Biener wrote:
>> On Thu, Nov 10, 2016 at 4:36 PM, Martin Liška  wrote:
>>> I've just noticed that tree-ssa-dse wrongly prints a new line to dump file.
>>> For the next stage1, I'll go through usages of print_gimple_stmt and remove
>>> extra new lines like:
>>>
>>> gcc/auto-profile.c:  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
>>> gcc/auto-profile.c-  fprintf (dump_file, "\n");
>>>
>>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>>
>>> Ready to be installed?
>>
>> Err, why not just remove the excess newlines (and drop the ' quotes)?
>
> OK, let's do it simple ;) There's a new output:
>
>   Deleted dead store: *p_2(D) = 0;
>
> Ready to install the patch after it finishes regression tests?

Ok.

Richard.

> Thanks,
> Martin
>
>>
>> Richard.
>>
>>> Martin
>


Re: [7/9][AArch64, libgcc] Let AArch64 use customized unwinder file

2016-11-14 Thread Jiong Wang

On 11/11/16 22:12, Joseph Myers wrote:

On Fri, 11 Nov 2016, Jiong Wang wrote:


There are two ways of introducing these AArch64 support:
   * Introducing a few target macros so we can customize functions like
 uw_init_context, uw_install_context etc.
   * Use target private unwind-dw2 implementation, i.e duplicate the generic
 unwind-dw2.c into target config directory and use it instead of generic
one.
 This is current used by IA64 and CR16 is using.

I am not sure which approach is the convention in libgcc, Ian, any comments on
this?

Although as you note duplication has been used before, I think it should
be strongly discouraged; duplicated files are unlikely to be kept up to
date with relevant changes to the main file.


Hi Joseph,


  The changes AArch64 needs to do on top of the generic unwind-dw2.c is at:

 https://gcc.gnu.org/ml/gcc-patches/2016-11/msg01167.html

  If I don't duplicate unwind-dw2.c, then I need to guard those changes with
something like __aarch64__ or introduce several target macros.  It looks to me
only the hunk that supports AArch64 DWARF operations worth a target macro,
something like MD_DW_OP_HANDLER, for the other changes they are quite scattered,
for example the field extension on "struct _Unwind_Context" and the relax of
assertion on uw_install_context_1.

  Any comments on this?

  Thanks.

Regards,
Jiong




[PATCH, GCC/ARM] Make arm_feature_set agree with type of FL_* macros

2016-11-14 Thread Thomas Preudhomme

Hi,

Currently arm_feature_set is defined in gcc/config/arm/arm-flags as an array of 
2 unsigned long. However, the flags stored in these two entries are (signed) 
int, being combinations of bits set via expression of the form 1 << bitno. This 
creates 3 issues:


1) undefined behavior when setting the msb (1 << 31)
2) undefined behavior when storing a flag with msb set (negative int) into one 
of the unsigned array entries (positive int)

3) waste of space since the top 32 bits of each entry is not used

This patch changes the definition of FL_* macro to be unsigned int by using the 
form 1U << bitno instead and changes the definition of arm_feature_set to be an 
array of 2 unsigned (int) entries.


Bootstrapped on arm-linux-gnueabihf targeting Thumb-2 state.

Is this ok for trunk?

Best regards,

Thomas
diff --git a/gcc/config/arm/arm-flags.h b/gcc/config/arm/arm-flags.h
index 9a5991aa07a229a7741e526c2876e7e0e4749db4..136a36e403dd3207deb91adf8c36e568bc08fd9e 100644
--- a/gcc/config/arm/arm-flags.h
+++ b/gcc/config/arm/arm-flags.h
@@ -25,49 +25,49 @@
 /* Flags used to identify the presence of processor capabilities.  */
 
 /* Bit values used to identify processor capabilities.  */
-#define FL_NONE	  (0)	  /* No flags.  */
-#define FL_ANY	  (0x)/* All flags.  */
-#define FL_CO_PROC(1 << 0)/* Has external co-processor bus */
-#define FL_ARCH3M (1 << 1)/* Extended multiply */
-#define FL_MODE26 (1 << 2)/* 26-bit mode support */
-#define FL_MODE32 (1 << 3)/* 32-bit mode support */
-#define FL_ARCH4  (1 << 4)/* Architecture rel 4 */
-#define FL_ARCH5  (1 << 5)/* Architecture rel 5 */
-#define FL_THUMB  (1 << 6)/* Thumb aware */
-#define FL_LDSCHED(1 << 7)	  /* Load scheduling necessary */
-#define FL_STRONG (1 << 8)	  /* StrongARM */
-#define FL_ARCH5E (1 << 9)/* DSP extensions to v5 */
-#define FL_XSCALE (1 << 10)	  /* XScale */
-/* spare	  (1 << 11)	*/
-#define FL_ARCH6  (1 << 12)   /* Architecture rel 6.  Adds
+#define FL_NONE	  (0U)	  /* No flags.  */
+#define FL_ANY	  (0xU)   /* All flags.  */
+#define FL_CO_PROC(1U << 0)   /* Has external co-processor bus */
+#define FL_ARCH3M (1U << 1)   /* Extended multiply */
+#define FL_MODE26 (1U << 2)   /* 26-bit mode support */
+#define FL_MODE32 (1U << 3)   /* 32-bit mode support */
+#define FL_ARCH4  (1U << 4)   /* Architecture rel 4 */
+#define FL_ARCH5  (1U << 5)   /* Architecture rel 5 */
+#define FL_THUMB  (1U << 6)   /* Thumb aware */
+#define FL_LDSCHED(1U << 7)	  /* Load scheduling necessary */
+#define FL_STRONG (1U << 8)	  /* StrongARM */
+#define FL_ARCH5E (1U << 9)   /* DSP extensions to v5 */
+#define FL_XSCALE (1U << 10)  /* XScale */
+/* spare	  (1U << 11)*/
+#define FL_ARCH6  (1U << 12)  /* Architecture rel 6.  Adds
 	 media instructions.  */
-#define FL_VFPV2  (1 << 13)   /* Vector Floating Point V2.  */
-#define FL_WBUF	  (1 << 14)	  /* Schedule for write buffer ops.
+#define FL_VFPV2  (1U << 13)  /* Vector Floating Point V2.  */
+#define FL_WBUF	  (1U << 14)  /* Schedule for write buffer ops.
 	 Note: ARM6 & 7 derivatives only.  */
-#define FL_ARCH6K (1 << 15)   /* Architecture rel 6 K extensions.  */
-#define FL_THUMB2 (1 << 16)	  /* Thumb-2.  */
-#define FL_NOTM	  (1 << 17)	  /* Instructions not present in the 'M'
+#define FL_ARCH6K (1U << 15)  /* Architecture rel 6 K extensions.  */
+#define FL_THUMB2 (1U << 16)  /* Thumb-2.  */
+#define FL_NOTM	  (1U << 17)  /* Instructions not present in the 'M'
 	 profile.  */
-#define FL_THUMB_DIV  (1 << 18)	  /* Hardware divide (Thumb mode).  */
-#define FL_VFPV3  (1 << 19)   /* Vector Floating Point V3.  */
-#define FL_NEON   (1 << 20)   /* Neon instructions.  */
-#define FL_ARCH7EM(1 << 21)	  /* Instructions present in the ARMv7E-M
+#define FL_THUMB_DIV  (1U << 18)  /* Hardware divide (Thumb mode).  */
+#define FL_VFPV3  (1U << 19)  /* Vector Floating Point V3.  */
+#define FL_NEON   (1U << 20)  /* Neon instructions.  */
+#define FL_ARCH7EM(1U << 21)  /* Instructions present in the ARMv7E-M
 	 architecture.  */
-#define FL_ARCH7  (1 << 22)   /* Architecture 7.  */
-#define FL_ARM_DIV(1 << 23)	  /* Hardware divide (ARM mode).  */
-#define FL_ARCH8  (1 << 24)   /* Architecture 8.  */
-#define FL_CRC32  (1 << 25)	  /* ARMv8 CRC32 instructions.  */
+#define FL_ARCH7  (1U << 22)  /* Architecture 7.  */
+#define FL_ARM_DIV(1U << 23)  /* Hardware divide (ARM mode).  */
+#define FL_ARCH8  (1U << 24)  /* Architecture 8.  */
+#define FL_CRC32  (1U << 25)  /* ARMv8 CRC32 instructions.  */
 
-#define FL_SMALLMUL   (1 << 26)  

Re: [PATCH, GCC/ARM] Make arm_feature_set agree with type of FL_* macros

2016-11-14 Thread Thomas Preudhomme
I forgot to mention that this patch is needed for the optional -mthumb patch [1] 
to bootstrap.


[1] https://gcc.gnu.org/ml/gcc-patches/2016-11/msg00735.html

Best regards,

Thomas

On 14/11/16 14:07, Thomas Preudhomme wrote:

Hi,

Currently arm_feature_set is defined in gcc/config/arm/arm-flags as an array of
2 unsigned long. However, the flags stored in these two entries are (signed)
int, being combinations of bits set via expression of the form 1 << bitno. This
creates 3 issues:

1) undefined behavior when setting the msb (1 << 31)
2) undefined behavior when storing a flag with msb set (negative int) into one
of the unsigned array entries (positive int)
3) waste of space since the top 32 bits of each entry is not used

This patch changes the definition of FL_* macro to be unsigned int by using the
form 1U << bitno instead and changes the definition of arm_feature_set to be an
array of 2 unsigned (int) entries.

Bootstrapped on arm-linux-gnueabihf targeting Thumb-2 state.

Is this ok for trunk?

Best regards,

Thomas


Re: [fixincludes] Fix macOS 10.12 and (PR sanitizer/78267)

2016-11-14 Thread Rainer Orth
Hi Jack,

>  > On darwin15, the proposed patch is insufficient to restore the bootstrap
>  > (after running genfixes in the fixincludes directory) unless I also apply
>  > the previously proposed change...
>
>  no wonder: it's only been tested on darwin16. Care to explain what
>  error you're seeing?
>
> The failure that I see on darwin15 using your proposed patches and executing
> genfixes in fixincludes before the build is...

the actual errors are ...

> In file included from
> ../../../../gcc-7-20161113/libsanitizer/sanitizer_common/sanitizer_mac.cc:39:0:
> /sw/src/fink.build/gcc7-7.0.0-1/darwin_objdir/gcc/include-fixed/os/trace.h:56:47:
> error: attributes are not allowed on a function-definition
> _os_trace_verify_printf(const char *msg, ...) __attribute__((format(printf, 
> 1, 2)))
> ^

I've added a new fix to just remove that attribute.  One could try and
add a declaration with the attribute if need be, but I've avoid that
trouble for now.

> /sw/src/fink.build/gcc7-7.0.0-1/darwin_objdir/gcc/include-fixed/os/trace.h:715:109:
> error: ‘os_trace_payload_t’ has not been declared
> _os_trace_with_buffer(void *dso, const char *message, uint8_t type, const void
> *buffer, size_t buffer_size, os_trace_payload_ payload);
> ^

This one isn't caught so far because 10.11 has

__OSX_AVAILABLE(10.10) __IOS_AVAILABLE(8.0) __WATCHOS_AVAILABLE(1.0) 
__TVOS_AVAILABLE(9.0)

instead of

__API_AVAILABLE(macosx(10.12), ios(10.0), watchos(3.0), tvos(10.0))

I've updated the corresponding fix to deal with this variation and
augmented the testcase accordingly.

Hopefully the new patch works for you.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


# HG changeset patch
# Parent  c9e06f02a0d86f769e4c4720ddfc0938c76bcb20
Fix macOS 10.12  and  (PR sanitizer/78267)

diff --git a/fixincludes/inclhack.def b/fixincludes/inclhack.def
--- a/fixincludes/inclhack.def
+++ b/fixincludes/inclhack.def
@@ -1338,6 +1338,32 @@ fix = {
 };
 
 /*
+ *  macOS 10.12  uses __attribute__((availability))
+ *  unconditionally.
+ */
+fix = {
+hackname  = darwin_availabilityinternal;
+mach  = "*-*-darwin*";
+files = AvailabilityInternal.h;
+select= "#define[ \t]+(__API_[ADU]\\([^)]*\\)).*";
+c_fix = format;
+c_fix_arg = <<- _EOFix_
+	#if defined(__has_attribute)
+	  #if __has_attribute(availability)
+	%0
+	  #else
+	#define %1
+	  #endif
+	#else
+	#define %1
+	#endif
+	_EOFix_;
+
+test_text = "#define __API_A(x) __attribute__((availability(__API_AVAILABLE_PLATFORM_##x)))\n"
+		"#define __API_D(msg,x) __attribute__((availability(__API_DEPRECATED_PLATFORM_##x,message=msg)))";
+};
+
+/*
  *  For the AAB_darwin7_9_long_double_funcs fix to be useful,
  *  you have to not use "" includes.
  */
@@ -1410,6 +1436,62 @@ fix = {
 };
 
 /*
+ *  Mac OS X 10.11  uses attribute on function definition.
+ */
+fix = {
+  hackname  = darwin_os_trace_1;
+  mach  = "*-*-darwin*";
+  files = os/trace.h;
+  select= "^(_os_trace_verify_printf.*) (__attribute__.*)";
+  c_fix = format;
+  c_fix_arg = "%1";
+  test_text = "_os_trace_verify_printf(const char *msg, ...) __attribute__((format(printf, 1, 2)))";
+};
+
+/*
+ *  Mac OS X 10.1[012]  os_trace_payload_t typedef uses Blocks
+ *  extension without guard.
+ */
+fix = {
+  hackname  = darwin_os_trace_2;
+  mach  = "*-*-darwin*";
+  files = os/trace.h;
+  select= "typedef.*\\^os_trace_payload_t.*";
+  c_fix = format;
+  c_fix_arg = "#if __BLOCKS__\n%0\n#endif";
+  test_text = "typedef void (^os_trace_payload_t)(xpc_object_t xdict);";
+};
+
+/*
+ *  In Mac OS X 10.1[012] , need to guard users of
+ *  os_trace_payload_t typedef, too.
+ */
+fix = {
+  hackname  = darwin_os_trace_3;
+  mach  = "*-*-darwin*";
+  files = os/trace.h;
+  select= <<- _EOSelect_
+	__(API|OSX)_.*
+	OS_EXPORT.*
+	.*
+	_os_trace.*os_trace_payload_t payload);
+	_EOSelect_;
+  c_fix = format;
+  c_fix_arg = "#if __BLOCKS__\n%0\n#endif";
+  test_text = <<- _EOText_
+	__API_AVAILABLE(macosx(10.10), ios(8.0), watchos(2.0), tvos(8.0))
+	OS_EXPORT OS_NOTHROW OS_NOT_TAIL_CALLED
+	void
+	_os_trace_with_buffer(void *dso, const char *message, uint8_t type, const void *buffer, size_t buffer_size, os_trace_payload_t payload);
+
+	__OSX_AVAILABLE_STARTING(__MAC_10_12, __IPHONE_10_0)
+	OS_EXPORT OS_NOTHROW
+	void
+	_os_trace_internal(void *dso, uint8_t type, const char *format, const uint8_t *buf, size_t buf_size, os_trace_payload_t payload);
+	_EOText_;
+};
+
+/*
  *  __private_extern__ doesn't exist in FSF GCC.  Even if it did,
  *  why would you ever put it in a system header file?
  */
@@ -2638,7 +2720,6 @@ fix = {
 c-fix-arg = "#  define	UINT_%164_MAX	__UINT64_MAX__";
 test-text = "#  define   UINT_FAST64_MAXULLONG_MAX\n"
 		"#  define   UINT_LEAST64_MAXULLONG_MAX\n";
-	_EOFix_;
 };
 
 /*
diff --gi

Re: [PATCH][ARM] Improve max_insns_skipped logic

2016-11-14 Thread Wilco Dijkstra
Wilco Dijkstra wrote:
> Richard Earnshaw wrote:
> On 10/11/16 17:19, Wilco Dijkstra wrote:

> Long conditional sequences are slow on modern cores - the value 6 for
> max_insns_skipped is a few decades out of date as it was meant for ARM2!
> Even with -Os the performance loss for larger values is not worth the
> small codesize gain (there are many better options to reduce codesize
> that actually improve performance at the same time). So using the same
> code generation heuristics for ARM and Thumb-2 is a good idea.

A simple codesize comparison on CSiBE shows using 4 rather than 6 for
max_insns_skipped is just 0.07% larger on ARM with -Os. So it's not
obvious that increasing max_insns_skipped in -Os is a useful codesize
optimization...

Wilco


Re: [PATCH 3/9] Introduce emit_status::ensure_regno_capacity

2016-11-14 Thread Bernd Schmidt

On 11/11/2016 10:15 PM, David Malcolm wrote:

Link to earlier version of the patch:
  https://gcc.gnu.org/ml/gcc-patches/2016-10/msg00278.html


Isn't this the same one?


Bernd



Re: [fixincludes] Fix macOS 10.12 and (PR sanitizer/78267)

2016-11-14 Thread Rainer Orth
Hi Jack,

> In case it helps, attached is a diff of the os/trace.h header from OS X 11.11 
> vs that
> from MacOS X 11.12. It looks like the context of the changes that you are 
> trying to
> apply into are different between the two OS releases and are the cause of the 
> build
> failures on darwin15.

as I wrote, there are two issues, one a slightly different context, the
other a broken function definition (which is gone in Darwin 16), both of
which should be fixed by my updated patch.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH][AArch64] Separate shrink wrapping hooks implementation

2016-11-14 Thread Kyrill Tkachov


On 11/11/16 15:31, Kyrill Tkachov wrote:


On 11/11/16 10:17, Kyrill Tkachov wrote:


On 10/11/16 23:39, Segher Boessenkool wrote:

On Thu, Nov 10, 2016 at 02:42:24PM -0800, Andrew Pinski wrote:

On Thu, Nov 10, 2016 at 6:25 AM, Kyrill Tkachov

I ran SPEC2006 on a Cortex-A72. Overall scores were neutral but there were
some interesting swings.
458.sjeng +1.45%
471.omnetpp   +2.19%
445.gobmk -2.01%

On SPECFP:
453.povray+7.00%


Wow, this looks really good.  Thank you for implementing this.  If I
get some time I am going to try it out on other processors than A72
but I doubt I have time any time soon.

I'd love to hear what causes the slowdown for gobmk as well, btw.


I haven't yet gotten a direct answer for that (through performance analysis 
tools)
but I have noticed that load/store pairs are not generated as aggressively as I 
hoped.
They are being merged by the sched fusion pass and peepholes (which runs after 
this)
but it still misses cases. I've hacked the SWS hooks to generate pairs 
explicitly and that
increases the number of pairs and helps code size to boot. It complicates the 
logic of
the hooks a bit but not too much.

I'll make those changes and re-benchmark, hopefully that
will help performance.



And here's a version that explicitly emits pairs. I've looked at assembly 
codegen on SPEC2006
and it generates quite a few more LDP/STP pairs than the original version.
I kicked off benchmarks over the weekend to see the effect.
Andrew, if you want to try it out (more benchmarking and testing always 
welcome) this is the
one to try.



And I discovered over the weekend that gamess and wrf have validation errors.
This version runs correctly.
SPECINT results were fine though and there is even a small overall gain due to
sjeng and omnetpp. However, gobmk still has the regression.
I'll rerun SPECFP with this patch (it's really just a small bugfix over the 
previous version)
and get on with analysing gobmk.

Thanks,
Kyrill

2016-11-11  Kyrylo Tkachov  

* config/aarch64/aarch64.h (machine_function): Add
reg_is_wrapped_separately field.
* config/aarch64/aarch64.c (emit_set_insn): Change return type to
rtx_insn *.
(aarch64_save_callee_saves): Don't save registers that are wrapped
separately.
(aarch64_restore_callee_saves): Don't restore registers that are
wrapped separately.
(offset_9bit_signed_unscaled_p, offset_12bit_unsigned_scaled_p,
aarch64_offset_7bit_signed_scaled_p): Move earlier in the file.
(aarch64_get_separate_components): New function.
(aarch64_get_next_set_bit): Likewise.
(aarch64_components_for_bb): Likewise.
(aarch64_disqualify_components): Likewise.
(aarch64_emit_prologue_components): Likewise.
(aarch64_emit_epilogue_components): Likewise.
(aarch64_set_handled_components): Likewise.
(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS,
TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB,
TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS,
TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS,
TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS,
TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Define.

commit 06ac3c30d8aa38781ee9019e60a5fcf727b85231
Author: Kyrylo Tkachov 
Date:   Tue Oct 11 09:25:54 2016 +0100

[AArch64] Separate shrink wrapping hooks implementation

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 325e725..2d33ef6 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1138,7 +1138,7 @@ aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
 
 /* Emit an insn that's a simple single-set.  Both the operands must be
known to be valid.  */
-inline static rtx
+inline static rtx_insn *
 emit_set_insn (rtx x, rtx y)
 {
   return emit_insn (gen_rtx_SET (x, y));
@@ -3135,6 +3135,9 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
 	  || regno == cfun->machine->frame.wb_candidate2))
 	continue;
 
+  if (cfun->machine->reg_is_wrapped_separately[regno])
+   continue;
+
   reg = gen_rtx_REG (mode, regno);
   offset = start_offset + cfun->machine->frame.reg_offset[regno];
   mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
@@ -3143,6 +3146,7 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
   regno2 = aarch64_next_callee_save (regno + 1, limit);
 
   if (regno2 <= limit
+	  && !cfun->machine->reg_is_wrapped_separately[regno2]
 	  && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
 	  == cfun->machine->frame.reg_offset[regno2]))
 
@@ -3191,6 +3195,9 @@ aarch64_restore_callee_saves (machine_mode mode,
regno <= limit;
regno = aarch64_next_callee_save (regno + 1, limit))
 {
+  if (cfun->machine->reg_is_wrapped_separately[regno])
+   continue;
+
   rtx reg, mem;
 
   if (skip_wb
@@ -3205,6 +3212,7 @@ aarch64_restore_callee_saves (machine_mode mode,
   regno2 = aarch64_next_callee_save (regno + 1, limit);
 
  

Re: [WIP C++ PATCH] P0217R3 - C++17 structured bindings

2016-11-14 Thread Richard Biener
On Mon, Nov 14, 2016 at 6:04 AM, Jason Merrill  wrote:
> On Wed, Nov 9, 2016 at 7:24 AM, Jakub Jelinek  wrote:
>> The match.pd hunk is needed, otherwise the generic folding happily folds
>> int arr[2];
>> ...
>> auto [ x, y ] = arr;
>> &x == &arr[0]
>> into 0, because it thinks x and arr are distinct VAR_DECLs.  Though, if
>> such comparisons are required to be folded in constexpr contexts under
>> certain conditions, we'd need to handle the DECL_VALUE_EXPRs in constexpr.c
>> somehow.
>
> What do you think of this approach instead?

get_addr_base_and_unit_offset_1 is infrastructure related to
get_ref_base_and_extent,
get_inner_reference and get_base_address.  All of those should really
behave the same
with respect to the innermost decl.

Thus I don't think we should handle this here.

Richard.


Re: [PATCH 3/9] Introduce emit_status::ensure_regno_capacity

2016-11-14 Thread David Malcolm
On Mon, 2016-11-14 at 15:17 +0100, Bernd Schmidt wrote:
> On 11/11/2016 10:15 PM, David Malcolm wrote:
> > Link to earlier version of the patch:
> >   https://gcc.gnu.org/ml/gcc-patches/2016-10/msg00278.html
> 
> Isn't this the same one?
> 
It is; sorry.  The rest of the patch kit has changed greatly since the
last version; I wanted to post that before the close of stage 1.

I'll work on integrating your earlier comments into the patch today.

Dave


Re: [WIP C++ PATCH] P0217R3 - C++17 structured bindings

2016-11-14 Thread Jakub Jelinek
On Mon, Nov 14, 2016 at 12:04:24AM -0500, Jason Merrill wrote:
> On Wed, Nov 9, 2016 at 7:24 AM, Jakub Jelinek  wrote:
> > The match.pd hunk is needed, otherwise the generic folding happily folds
> > int arr[2];
> > ...
> > auto [ x, y ] = arr;
> > &x == &arr[0]
> > into 0, because it thinks x and arr are distinct VAR_DECLs.  Though, if
> > such comparisons are required to be folded in constexpr contexts under
> > certain conditions, we'd need to handle the DECL_VALUE_EXPRs in constexpr.c
> > somehow.
> 
> What do you think of this approach instead?

As Richard said, we'd need to change the 3 other functions too.
And there is additional complication, for OpenMP we defer gimplification of
vars with DECL_VALUE_EXPRs on them, because they often get a different
DECL_VALUE_EXPR.  So if optimizers look through DECL_VALUE_EXPR rather than
punt on vars with DECL_VALUE_EXPR, we risk the undesirable value expressions
might leak into the IL.

So I think we want just punt on vars with DECL_VALUE_EXPR (perhaps even in those
4 functions), except for constexpr.c where we perhaps special case the
decomposition vars or something similar.

Jakub


Re: [PATCH, GCC/ARM] Make arm_feature_set agree with type of FL_* macros

2016-11-14 Thread Kyrill Tkachov


On 14/11/16 14:07, Thomas Preudhomme wrote:

Hi,

Currently arm_feature_set is defined in gcc/config/arm/arm-flags as an array of 2 unsigned long. However, the flags stored in these two entries are (signed) int, being combinations of bits set via expression of the form 1 << bitno. This 
creates 3 issues:


1) undefined behavior when setting the msb (1 << 31)
2) undefined behavior when storing a flag with msb set (negative int) into one 
of the unsigned array entries (positive int)
3) waste of space since the top 32 bits of each entry is not used

This patch changes the definition of FL_* macro to be unsigned int by using the form 
1U << bitno instead and changes the definition of arm_feature_set to be an 
array of 2 unsigned (int) entries.

Bootstrapped on arm-linux-gnueabihf targeting Thumb-2 state.

Is this ok for trunk?



Ok.
Thanks,
Kyrill


Best regards,

Thomas




Re: [PATCH, GCC] Recognize partial load of source expression on big endian targets

2016-11-14 Thread Richard Biener
On Fri, 11 Nov 2016, Thomas Preudhomme wrote:

> Hi,
> 
> To fix PR69714, code was added to disable bswap when the resulting symbolic
> expression (a load or load + byte swap) is smaller than the source expression
> (eg. some of the bytes accessed in the source code gets bitwise ANDed with 0).
> As explained in [1], there was already two pieces of code written
> independently in bswap to deal with that case and that's the interaction of
> the two that caused the bug.
> 
> [1] https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00948.html
> 
> PR69714 proves that this pattern do occur in real code so this patch set out
> to reenable the optimization and remove the big endian adjustment in
> bswap_replace: the change in find_bswap_or_nop ensures that either we cancel
> the optimization or we don't and there is no need for offset adjustement. As
> explained in [2], the current code only support loss of bytes at the highest
> addresses because there is no code to adjust the address of the load. However,
> for little and big endian targets the bytes at highest address translate into
> different byte significance in the result. This patch first separate cmpxchg
> and cmpnop adjustement into 2 steps and then deal with endianness correctly
> for the second step.
> 
> [2] https://gcc.gnu.org/ml/gcc-patches/2016-01/msg00119.html
> 
> Ideally we would want to still be able to do the adjustment to deal with load
> or load+bswap at an offset from the byte at lowest memory address accessed but
> this would require more code to recognize it properly for both little endian
> and big endian and will thus have to wait GCC 8 stage 1.
> 
> ChangeLog entry is as follows:
> 
> *** gcc/ChangeLog ***
> 
> 2016-11-10  Thomas Preud'homme  
> 
> * tree-ssa-math-opts.c (find_bswap_or_nop): Zero out bytes in cmpxchg
> and cmpnop in two steps: first the ones not accessed in original
> gimple
> expression in a endian independent way and then the ones not accessed
> in the final result in an endian-specific way.
> (bswap_replace): Stop doing big endian adjustment.
> 
> 
> Testsuite does not show any regression on an armeb-none-eabi GCC
> cross-compiler targeting ARM Cortex-M3 and on an x86_64-linux-gnu bootstrapped
> native GCC compiler. Bootstrap on powerpc in progress.
> 
> Is this ok for trunk provided that the powerpc bootstrap succeeds?

Ok.

Thanks,
Richard.


Re: [WIP C++ PATCH] P0217R3 - C++17 structured bindings

2016-11-14 Thread Jakub Jelinek
On Sun, Nov 13, 2016 at 11:53:10PM -0500, Jason Merrill wrote:
> On Wed, Nov 9, 2016 at 8:05 AM, Jakub Jelinek  wrote:
> > On Wed, Nov 09, 2016 at 01:24:22PM +0100, Jakub Jelinek wrote:
> >> The following patch is a WIP on P0217R3 - decomposition declarations.
> >> It contains various FIXMEs, Jason, do you think you could finish it up?
> 
> Here's what I'm checking in, as a delta from from your patch.  More
> testcases would still be welcome.

Do we want to check this in (tested on x86_64-linux)?  Or are some further
changes needed before that (e.g. has inline, constexpr, extern, static
etc. been allowed for decompositions in Issaquah or not)?

Are you going to update https://gcc.gnu.org/projects/cxx-status.html ?
Seems during the C++ meeting clang added:

Matching template template parameters to compatible arguments   P0522R0
Removing deprecated dynamic exception specificationsP0003R5
Pack expansions in using-declarations   P0195R2

rows to their table too, are you going to add those as well (to the table
and/or GCC 7)?

2016-11-14  Jakub Jelinek  

* c-cppbuiltin.c (c_cpp_builtins): Define __cpp_structured_bindings.

* g++.dg/cpp1z/feat-cxx1z.C: Test __cpp_structured_bindings macro.

--- gcc/c-family/c-cppbuiltin.c.jj  2016-11-09 23:55:12.0 +0100
+++ gcc/c-family/c-cppbuiltin.c 2016-11-14 15:21:56.814759245 +0100
@@ -943,6 +943,7 @@ c_cpp_builtins (cpp_reader *pfile)
  cpp_define (pfile, "__cpp_deduction_guides=201606");
  cpp_define (pfile, "__cpp_noexcept_function_type=201510");
  cpp_define (pfile, "__cpp_template_auto=201606");
+ cpp_define (pfile, "__cpp_structured_bindings=201606");
}
   if (flag_concepts)
cpp_define (pfile, "__cpp_concepts=201507");
--- gcc/testsuite/g++.dg/cpp1z/feat-cxx1z.C.jj  2016-11-09 23:55:14.0 
+0100
+++ gcc/testsuite/g++.dg/cpp1z/feat-cxx1z.C 2016-11-14 15:26:13.459539622 
+0100
@@ -392,6 +392,12 @@
 #  error "__cpp_noexcept_function_type != 201510"
 #endif
 
+#ifndef __cpp_structured_bindings
+#  error "__cpp_structured_bindings"
+#elif __cpp_structured_bindings != 201606
+#  error "__cpp_structured_bindings != 201606"
+#endif
+
 #ifdef __has_cpp_attribute
 
 #  if ! __has_cpp_attribute(maybe_unused)


Jakub


Re: [PATCH] PR fortran/78300 -- class procedure as actual arg

2016-11-14 Thread Steve Kargl
On Mon, Nov 14, 2016 at 12:29:31PM +0100, Janus Weil wrote:
> > After looking into this a little bit more, I found that the culprit
> > seems to be 'resolve_procedure_interface', which does not properly
> > copy the 'class_ok' attribute. I propose the attached patch to fix
> > this (regtesting right now) ...
> 
> The regtest finished successfully. Is that patch ok for trunk?
> 

Yes.  My original patch also regtest without a problem, but
you have a better fix.  You can also commit the patch to 
5 and 6 branch.

-- 
Steve


Re: [PATCH 9/9] Add "__RTL" to cc1 (v4)

2016-11-14 Thread Richard Biener
On Fri, Nov 11, 2016 at 10:15 PM, David Malcolm  wrote:
> Changed in this version:
>
> * Rather than running just one pass, run *all* passes, but start at
>   the given pass; support for "dg-do run" tests that execute the
>   resulting code.
> * Updated test cases to new "compact" dump format; more test cases;
>   use "dg-do run" in various places.
> * Lots of bugfixing
>
> Links to previous versions:
>   https://gcc.gnu.org/ml/gcc-patches/2016-10/msg00263.html
>   https://gcc.gnu.org/ml/gcc-patches/2016-10/msg00500.html

Does running the RTL passes right from the parser work with -fsyntax-only?
Doing it like __GIMPLE has the advantage of not exposing
"rest_of_compilation", etc..

I'm now handling __GIMPLE from within declspecs (the GIMPLE FE stuff
has been committed), it would be nice to match the __RTL piece here.

> gcc/ChangeLog:
> * Makefile.in (OBJS): Add run-rtl-passes.o.
>
> gcc/c-family/ChangeLog:
> * c-common.c (c_common_reswords): Add "__RTL".
> * c-common.h (enum rid): Add RID_RTL.
>
> gcc/c/ChangeLog:
> * c-parser.c: Include "read-rtl-function.h" and
> "run-rtl-passes.h".
> (c_parser_declaration_or_fndef): In the "GNU extensions" part of
> the leading comment, add an alternate production for
> "function-definition", along with new "rtl-body-specifier" and
> "rtl-body-pass-specifier" productions.  Handle "__RTL" by calling
> c_parser_parse_rtl_body.  Convert a timevar_push/pop pair
> to an auto_timevar, to cope with early exit.
> (c_parser_parse_rtl_body): New function.
>
> gcc/ChangeLog:
> * cfg.c (free_original_copy_tables): Remove assertion
> on original_copy_bb_pool.

How can that trigger?

> * cgraph.h (symtab_node::native_rtl_p): New decl.
> * cgraphunit.c (symtab_node::native_rtl_p): New function.
> (symtab_node::needed_p): Don't assert for early assembly output
> for __RTL functions.
> (cgraph_node::finalize_function): Set "force_output" for __RTL
> functions.
> (cgraph_node::analyze): Bail out early for __RTL functions.
> (analyze_functions): Update assertion to support __RTL functions.
> (cgraph_node::expand): Bail out early for __RTL functions.
> * emit-rtl.c (unshare_all_rtl_again): Wrap set_used_decls call
> in check for DECL_INITIAL.

You should simply set DECL_INITIAL of your function decl (make_node (BLOCK);).
There's too much code assuming that is not NULL (and I've fixed quite a bit of
code during stage1 not doing that).

> * final.c (rest_of_clean_state): Don't call delete_tree_ssa for
> _RTL functions.
> * function.h (struct function): Add field "native_RTL".

I wonder if you could simply use ->curr_properties & PROP_rtl?  (and set that
property during parsing, of course)

> * gimple-expr.c (gimple_has_body_p): Return false for __RTL
> functions.
> * pass_manager.h (gcc::pass_manager::get_rest_of_compilation): New
> accessor.
> (gcc::pass_manager::get_clean_slate): New accessor.
> * passes.c: Include "insn-addr.h".
> (execute_one_pass): Implement skipping of passes for functions
> with pass_startwith set.
> * read-md.c (md_reader::read_char): Support filtering
> the input to a subset of line numbers.
> (md_reader::md_reader): Initialize fields
> m_first_line and m_last_line.
> (md_reader::read_file_fragment): New function.
> * read-md.h (md_reader::read_file_fragment): New decl.
> (md_reader::m_first_line): New field.
> (md_reader::int m_last_line): New field.
> * read-rtl-function.c (function_reader::create_function): Only create
> cfun if it doesn't already exist.  Set "native_RTL" on cfun.  Set
> DECL_INITIAL.
> (read_rtl_function_body_from_file_range): New function.
> * read-rtl-function.h (read_rtl_function_body_from_file_range):
> New decl.
> * run-rtl-passes.c: New file.
> * run-rtl-passes.h: New file.
>
> gcc/testsuite/ChangeLog:
> * gcc.dg/rtl/aarch64/asr_div1.c: New file.
> * gcc.dg/rtl/aarch64/pr71779.c: New file.
> * gcc.dg/rtl/rtl.exp: New file.
> * gcc.dg/rtl/test.c: New file.
> * gcc.dg/rtl/unknown-rtx-code.c: New file.
> * gcc.dg/rtl/x86_64/dfinit.c: New file.
> * gcc.dg/rtl/x86_64/different-structs.c: New file.
> * gcc.dg/rtl/x86_64/final.c: New file.
> * gcc.dg/rtl/x86_64/into-cfglayout.c: New file.
> * gcc.dg/rtl/x86_64/ira.c: New file.
> * gcc.dg/rtl/x86_64/pro_and_epilogue.c: New file.
> * gcc.dg/rtl/x86_64/test-return-const.c.after-expand.c: New file.
> * gcc.dg/rtl/x86_64/test-return-const.c.before-fwprop.c: New file.
> * gcc.dg/rtl/x86_64/test-rtl.c: New file.
> * gcc.dg/rtl/x86_64/test_1.h: New file.
> * gcc.dg/rtl/x

Re: [WIP C++ PATCH] P0217R3 - C++17 structured bindings

2016-11-14 Thread Jason Merrill
On Mon, Nov 14, 2016 at 9:51 AM, Jakub Jelinek  wrote:
> On Sun, Nov 13, 2016 at 11:53:10PM -0500, Jason Merrill wrote:
>> On Wed, Nov 9, 2016 at 8:05 AM, Jakub Jelinek  wrote:
>> > On Wed, Nov 09, 2016 at 01:24:22PM +0100, Jakub Jelinek wrote:
>> >> The following patch is a WIP on P0217R3 - decomposition declarations.
>> >> It contains various FIXMEs, Jason, do you think you could finish it up?
>>
>> Here's what I'm checking in, as a delta from from your patch.  More
>> testcases would still be welcome.
>
> Do we want to check this in (tested on x86_64-linux)?

Yes, thanks, I keep forgetting the macros.

> Or are some further
> changes needed before that (e.g. has inline, constexpr, extern, static
> etc. been allowed for decompositions in Issaquah or not)?

These haven't been considered yet.

> Are you going to update https://gcc.gnu.org/projects/cxx-status.html ?
> Seems during the C++ meeting clang added:
>
> Matching template template parameters to compatible arguments   P0522R0
> Removing deprecated dynamic exception specificationsP0003R5
> Pack expansions in using-declarations   P0195R2
>
> rows to their table too, are you going to add those as well (to the table
> and/or GCC 7)?

I will.

Jason


Re: [PATCH 00/11] more rtx_insn * stuff

2016-11-14 Thread Trevor Saunders
On Mon, Nov 14, 2016 at 02:45:40PM +0100, Bernd Schmidt wrote:
> On 11/14/2016 09:09 AM, tbsaunde+...@tbsaunde.org wrote:
> > From: Trevor Saunders 
> > 
> > Hi,
> > 
> > Basically $subject which gets rid of a few more casts over all.
> > 
> > I ment to get this out a little while back, but life got busy, and I didn't
> > read the status announcement properly, so virtually working from hawaii for
> > now. patches individually built and regtested on x86_64-linux-gnu, and 
> > series
> > run through config-list.mk, ok?
> 
> Ok for all except #3 and #11.
> 
> For #3, I just don't like increasing indentation like that, I prefer to just
> declare the variable earlier.

ok, fine.

> #11 does unexplained things with dyn_casts (why not as_a?) and templates.

sorry, I really should have explained that.  For some reason we were
missing a is_a_helper specialization to test if a const_rtx is a
rtx_call_insn *, we have ones for things like const_rtx to const
rtx_insn *, but not const rtx_call_insn *.  So I added that which is the
template stuff.  I used dyn_cast because its the same as if (is_a ())
as_a (), but in this case its shorter than writing that out.  Of
course I could use is_a and as_a if that seems clearer.

Trev

> 
> Bernd


Re: [PATCH, vec-tails] Support loop epilogue vectorization

2016-11-14 Thread Yuri Rumyantsev
Richard,

I checked one of the tests designed for epilogue vectorization using
patches 1 - 3 and found out that build compiler performs vectorization
of epilogues with --param vect-epilogues-nomask=1 passed:

$ gcc -Ofast -mavx2 t1.c -S --param vect-epilogues-nomask=1 -o
t1.new-nomask.s -fdump-tree-vect-details
$ grep VECTORIZED -c t1.c.156t.vect
4
 Without param only 2 loops are vectorized.

Should I simply add a part of tests related to this feature or I must
delete all not necessary changes also?

Thanks.
Yuri.

2016-11-14 16:40 GMT+03:00 Richard Biener :
> On Mon, 14 Nov 2016, Yuri Rumyantsev wrote:
>
>> Richard,
>>
>> In my previous patch I forgot to remove couple lines related to aux field.
>> Here is the correct updated patch.
>
> Yeah, I noticed.  This patch would be ok for trunk (together with
> necessary parts from 1 and 2) if all not required parts are removed
> (and you'd add the testcases covering non-masked tail vect).
>
> Thus, can you please produce a single complete patch containing only
> non-masked epilogue vectoriziation?
>
> Thanks,
> Richard.
>
>> Thanks.
>> Yuri.
>>
>> 2016-11-14 15:51 GMT+03:00 Richard Biener :
>> > On Fri, 11 Nov 2016, Yuri Rumyantsev wrote:
>> >
>> >> Richard,
>> >>
>> >> I prepare updated 3 patch with passing additional argument to
>> >> vect_analyze_loop as you proposed (untested).
>> >>
>> >> You wrote:
>> >> tw, I wonder if you can produce a single patch containing just
>> >> epilogue vectorization, that is combine patches 1-3 but rip out
>> >> changes only needed by later patches?
>> >>
>> >> Did you mean that I exclude all support for vectorization epilogues,
>> >> i.e. exclude from 2-nd patch all non-related changes
>> >> like
>> >>
>> >> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
>> >> index 11863af..32011c1 100644
>> >> --- a/gcc/tree-vect-loop.c
>> >> +++ b/gcc/tree-vect-loop.c
>> >> @@ -1120,6 +1120,12 @@ new_loop_vec_info (struct loop *loop)
>> >>LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
>> >>LOOP_VINFO_PEELING_FOR_NITER (res) = false;
>> >>LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
>> >> +  LOOP_VINFO_CAN_BE_MASKED (res) = false;
>> >> +  LOOP_VINFO_REQUIRED_MASKS (res) = 0;
>> >> +  LOOP_VINFO_COMBINE_EPILOGUE (res) = false;
>> >> +  LOOP_VINFO_MASK_EPILOGUE (res) = false;
>> >> +  LOOP_VINFO_NEED_MASKING (res) = false;
>> >> +  LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
>> >
>> > Yes.
>> >
>> >> Did you mean also that new combined patch must be working patch, i.e.
>> >> can be integrated without other patches?
>> >
>> > Yes.
>> >
>> >> Could you please look at updated patch?
>> >
>> > Will do.
>> >
>> > Thanks,
>> > Richard.
>> >
>> >> Thanks.
>> >> Yuri.
>> >>
>> >> 2016-11-10 15:36 GMT+03:00 Richard Biener :
>> >> > On Thu, 10 Nov 2016, Richard Biener wrote:
>> >> >
>> >> >> On Tue, 8 Nov 2016, Yuri Rumyantsev wrote:
>> >> >>
>> >> >> > Richard,
>> >> >> >
>> >> >> > Here is updated 3 patch.
>> >> >> >
>> >> >> > I checked that all new tests related to epilogue vectorization 
>> >> >> > passed with it.
>> >> >> >
>> >> >> > Your comments will be appreciated.
>> >> >>
>> >> >> A lot better now.  Instead of the ->aux dance I now prefer to
>> >> >> pass the original loops loop_vinfo to vect_analyze_loop as
>> >> >> optional argument (if non-NULL we analyze the epilogue of that
>> >> >> loop_vinfo).  OTOH I remember we mainly use it to get at the
>> >> >> original vectorization factor?  So we can pass down an (optional)
>> >> >> forced vectorization factor as well?
>> >> >
>> >> > Btw, I wonder if you can produce a single patch containing just
>> >> > epilogue vectorization, that is combine patches 1-3 but rip out
>> >> > changes only needed by later patches?
>> >> >
>> >> > Thanks,
>> >> > Richard.
>> >> >
>> >> >> Richard.
>> >> >>
>> >> >> > 2016-11-08 15:38 GMT+03:00 Richard Biener :
>> >> >> > > On Thu, 3 Nov 2016, Yuri Rumyantsev wrote:
>> >> >> > >
>> >> >> > >> Hi Richard,
>> >> >> > >>
>> >> >> > >> I did not understand your last remark:
>> >> >> > >>
>> >> >> > >> > That is, here (and avoid the FOR_EACH_LOOP change):
>> >> >> > >> >
>> >> >> > >> > @@ -580,12 +586,21 @@ vectorize_loops (void)
>> >> >> > >> >   && dump_enabled_p ())
>> >> >> > >> >   dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, 
>> >> >> > >> > vect_location,
>> >> >> > >> >"loop vectorized\n");
>> >> >> > >> > -   vect_transform_loop (loop_vinfo);
>> >> >> > >> > +   new_loop = vect_transform_loop (loop_vinfo);
>> >> >> > >> > num_vectorized_loops++;
>> >> >> > >> >/* Now that the loop has been vectorized, allow it to be 
>> >> >> > >> > unrolled
>> >> >> > >> >   etc.  */
>> >> >> > >> >  loop->force_vectorize = false;
>> >> >> > >> >
>> >> >> > >> > +   /* Add new loop to a processing queue.  To make it 
>> >> >> > >> > easier
>> >> >> > >> > +  to match loop and its epilogue vectorization in dumps
>> >> >> > >> > +  put new loop as the next loop to proc

[PATCH] Add map clauses to libgomp test device-3.f90

2016-11-14 Thread Martin Jambor
Hi,

yesterday I forgot to send out the following patch.  The test
libgomp/testsuite/libgomp.fortran/examples-4/device-3.f90 was failing
for me when I was testing the HSA branch merge but I believe the test
itself is wrong and the failure is due to us now adhering to OpenMP
4.5 default mapping of scalars (i.e. firstprivate, as opposed to
tofrom in 4.0) and the test itself needs to be fixed in the following
way.

OK for trunk?  Thanks,

Martin


2016-11-11  Martin Jambor  

* device-3.f90 (e_57_3): Add a mapping clause to target construct.

diff --git a/libgomp/testsuite/libgomp.fortran/examples-4/device-3.f90 
b/libgomp/testsuite/libgomp.fortran/examples-4/device-3.f90
index a29f1b5..95d9f44 100644
--- a/libgomp/testsuite/libgomp.fortran/examples-4/device-3.f90
+++ b/libgomp/testsuite/libgomp.fortran/examples-4/device-3.f90
@@ -8,13 +8,13 @@ program e_57_3
   integer :: default_device
 
   default_device = omp_get_default_device ()
-  !$omp target
+  !$omp target map(from:res)
 res = omp_is_initial_device ()
   !$omp end target
   if (res) call abort
 
   call omp_set_default_device (omp_get_num_devices ())
-  !$omp target
+  !$omp target map(from:res)
 res = omp_is_initial_device ()
   !$omp end target
   if (.not. res) call abort


[PATCH, i386]: Merge some operand constraints

2016-11-14 Thread Uros Bizjak
No functional changes.

2016-11-14  Uros Bizjak  

* config/i386/i386.md (*andndi3_doubleword): Merge operand constraints.
(*ashl3_doubleword): Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu, committed to
mainline SVN.

Uros.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ac2650b..7db04ce 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8745,10 +8745,10 @@
 })
 
 (define_insn_and_split "*andndi3_doubleword"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r")
(and:DI
- (not:DI (match_operand:DI 1 "register_operand" "r,r"))
- (match_operand:DI 2 "nonimmediate_operand" "r,m")))
+ (not:DI (match_operand:DI 1 "register_operand" "r"))
+ (match_operand:DI 2 "nonimmediate_operand" "rm")))
(clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI && !TARGET_64BIT && TARGET_STV && TARGET_SSE"
   "#"
@@ -9704,9 +9704,9 @@
   "ix86_expand_binary_operator (ASHIFT, mode, operands); DONE;")
 
 (define_insn "*ashl3_doubleword"
-  [(set (match_operand:DWI 0 "register_operand" "=&r,&r")
-   (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "n,0")
-   (match_operand:QI 2 "nonmemory_operand" "c,c")))
+  [(set (match_operand:DWI 0 "register_operand" "=&r")
+   (ashift:DWI (match_operand:DWI 1 "reg_or_pm1_operand" "0n")
+   (match_operand:QI 2 "nonmemory_operand" "c")))
(clobber (reg:CC FLAGS_REG))]
   ""
   "#"


[PATCH] fix PR sanitizer/78267

2016-11-14 Thread Jack Howarth
The attached patch fixes PR sanitizer/78267 by conditionalizing the
include of  on the compiler defining __BLOCKS__ as a
supported extension. Passes bootstrap on x86_64-apple-darwin15. Okay
for gcc trunk?
2016-11-14  Jack Howarth  

libsanitizer/

PR sanitizer/78267
* sanitizer_common/sanitizer_mac.cc: Include  only if
compiler supports blocks extension.


Index: libsanitizer/sanitizer_common/sanitizer_mac.cc
===
--- libsanitizer/sanitizer_common/sanitizer_mac.cc  (revision 242387)
+++ libsanitizer/sanitizer_common/sanitizer_mac.cc  (working copy)
@@ -34,7 +34,7 @@
 extern char **environ;
 #endif
 
-#if defined(__has_include) && __has_include()
+#if defined(__has_include) && __has_include() && 
defined(__BLOCKS__)
 #define SANITIZER_OS_TRACE 1
 #include 
 #else


Re: [PATCH] fix PR sanitizer/78267

2016-11-14 Thread Iain Sandoe

> On 14 Nov 2016, at 16:57, Jack Howarth  wrote:
> 
> The attached patch fixes PR sanitizer/78267 by conditionalizing the
> include of  on the compiler defining __BLOCKS__ as a
> supported extension. Passes bootstrap on x86_64-apple-darwin15. Okay
> for gcc trunk?
> 

Rainer has that PR assigned and is making a proper fix (which will prevent 
other failures on the port), unless he decides to punt on it, I think we should 
wait and get a proper fix,

Iain



Re: [PATCH, GCC/ARM] Make arm_feature_set agree with type of FL_* macros

2016-11-14 Thread Christophe Lyon
Hi,


On 14 November 2016 at 15:07, Thomas Preudhomme
 wrote:
> Hi,
>
> Currently arm_feature_set is defined in gcc/config/arm/arm-flags as an array
> of 2 unsigned long. However, the flags stored in these two entries are
> (signed) int, being combinations of bits set via expression of the form 1 <<
> bitno. This creates 3 issues:
>
> 1) undefined behavior when setting the msb (1 << 31)
> 2) undefined behavior when storing a flag with msb set (negative int) into
> one of the unsigned array entries (positive int)

Just curious: are these problems seen when building with ubsan enabled?

> 3) waste of space since the top 32 bits of each entry is not used
>
> This patch changes the definition of FL_* macro to be unsigned int by using
> the form 1U << bitno instead and changes the definition of arm_feature_set
> to be an array of 2 unsigned (int) entries.
>
> Bootstrapped on arm-linux-gnueabihf targeting Thumb-2 state.
>
> Is this ok for trunk?
>
> Best regards,
>
> Thomas


[PATCH][PPC] Fix ICE using power9 with soft-float

2016-11-14 Thread Andrew Stubbs
The testcase powerpc/fusion3.c causes an ICE when compiled with 
-msoft-float.


The key line in the testcase looks fairly harmless:

   void fusion_float_write (float *p, float f){ p[LARGE] = f; }

The error message look like this:

.../gcc.target/powerpc/fusion3.c: In function 'fusion_float_write':^M
.../gcc.target/powerpc/fusion3.c:12:1: error: unrecognizable insn:^M
(insn 18 4 14 2 (parallel [^M
(set (mem:SF (plus:SI (plus:SI (reg:SI 3 3 [ p ])^M
(const_int 327680 [0x5]))^M
(const_int -29420 [0x8d14])) [1 
MEM[(float *)p_1(D) + 298260B]+0 S4 A32])^M

(unspec:SF [^M
(reg:SF 4 4 [ f ])^M
] UNSPEC_FUSION_P9))^M
(clobber (reg/f:SI 3 3 [157]))^M
]) 
"/scratch/astubbs/fsf/src/gcc-mainline/gcc/testsuite/gcc.target/powerpc/fusion3.c":12 
-1^M

 (nil))^M

Basically, the problem is that the peephole optimization tries to create 
a Power9 Fusion instruction, but those do not support SF values in 
integer registers (AFAICT).


So, presumably, I need to adjust either the predicate or the condition 
of the peephole rules.


The predicate used is "toc_fusion_or_p9_reg_operand", and this might be 
the root cause, but I don't know the architecture well enough to be 
sure. The predicate code seems to suggest that "toc_fusion", whatever 
that is, should be able to do this, but the insn produced by the 
peephole uses only UNSPEC_FUSION_P9, which does not. Perhaps this 
predicate is inappropriate for the P9 Fusion peephole, or perhaps it 
needs to be taught about this corner case?


In any case, I don't want to change the predicate without being sure 
what it does (here and elsewhere), so the attached patch solves the 
problem by changing the condition.


Is this OK, or do I need to do something less blunt?

Thanks

Andrew
2016-11-11  Andrew Stubbs  

	gcc/
	* config/rs6000/rs6000.md: Disable P9-fusion peepholes when
	TARGET_SOFT_FLOAT is set.

	gcc/testsuite/
	* gcc.target/powerpc/fusion3.c: Skip on -msoft-float.

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index de959c9..28d8174 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -13024,7 +13024,8 @@
(set (match_operand:SFDF 2 "toc_fusion_or_p9_reg_operand" "")
 	(match_operand:SFDF 3 "fusion_offsettable_mem_operand" ""))]
   "TARGET_P9_FUSION && peep2_reg_dead_p (2, operands[0])
-   && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])"
+   && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])
+   && !TARGET_SOFT_FLOAT"
   [(const_int 0)]
 {
   expand_fusion_p9_load (operands);
@@ -13037,7 +13038,8 @@
(set (match_operand:SFDF 2 "offsettable_mem_operand" "")
 	(match_operand:SFDF 3 "toc_fusion_or_p9_reg_operand" ""))]
   "TARGET_P9_FUSION && peep2_reg_dead_p (2, operands[0])
-   && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])"
+   && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])
+   && !TARGET_SOFT_FLOAT"
   [(const_int 0)]
 {
   expand_fusion_p9_store (operands);
@@ -13050,7 +13052,8 @@
(set (match_dup 0)
 	(ior:SDI (match_dup 0)
 		 (match_operand:SDI 2 "u_short_cint_operand" "")))]
-  "TARGET_P9_FUSION"
+  "TARGET_P9_FUSION
+   && !TARGET_SOFT_FLOAT"
   [(set (match_dup 0)
 	(unspec:SDI [(match_dup 1)
 		 (match_dup 2)] UNSPEC_FUSION_P9))])
@@ -13063,7 +13066,8 @@
 		 (match_operand:SDI 3 "u_short_cint_operand" "")))]
   "TARGET_P9_FUSION
&& !rtx_equal_p (operands[0], operands[2])
-   && peep2_reg_dead_p (2, operands[0])"
+   && peep2_reg_dead_p (2, operands[0])
+   && !TARGET_SOFT_FLOAT"
   [(set (match_dup 2)
 	(unspec:SDI [(match_dup 1)
 		 (match_dup 3)] UNSPEC_FUSION_P9))])
diff --git a/gcc/testsuite/gcc.target/powerpc/fusion3.c b/gcc/testsuite/gcc.target/powerpc/fusion3.c
index 8eca640..20992d0 100644
--- a/gcc/testsuite/gcc.target/powerpc/fusion3.c
+++ b/gcc/testsuite/gcc.target/powerpc/fusion3.c
@@ -2,6 +2,7 @@
 /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
 /* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
+/* { dg-skip-if "-mpower9-fusion and -msoft-float are incompatible" { powerpc*-*-* } { "-msoft-float" } {} } */
 /* { dg-options "-mcpu=power7 -mtune=power9 -O3" } */
 
 #define LARGE 0x12345


Re: [PATCH] Add map clauses to libgomp test device-3.f90

2016-11-14 Thread Alexander Monakov


On Mon, 14 Nov 2016, Martin Jambor wrote:

> Hi,
> 
> yesterday I forgot to send out the following patch.  The test
> libgomp/testsuite/libgomp.fortran/examples-4/device-3.f90 was failing
> for me when I was testing the HSA branch merge but I believe the test
> itself is wrong and the failure is due to us now adhering to OpenMP
> 4.5 default mapping of scalars (i.e. firstprivate, as opposed to
> tofrom in 4.0) and the test itself needs to be fixed in the following
> way.

>From inspection, I believe device-1.f90 in the same directory has the same
issue?  Here's the corresponding patch.

I also think there's the same issue in target-3.f90 and target5.f90
(compile-only test).

diff --git a/libgomp/testsuite/libgomp.fortran/examples-4/device-1.f90 
b/libgomp/testsuite/libgomp.fortran/examples-4/device-1.f90
index a411db4..3404f01 100644
--- a/libgomp/testsuite/libgomp.fortran/examples-4/device-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/examples-4/device-1.f90
@@ -9,11 +9,11 @@ program e_57_1
   a = 100
   b = 0

-  !$omp target if(a > 200 .and. a < 400)
+  !$omp target map(from: c) if(a > 200 .and. a < 400)
 c = omp_is_initial_device ()
   !$omp end target

-  !$omp target data map(to: b) if(a > 200 .and. a < 400)
+  !$omp target data map(to: b) map(from: d) if(a > 200 .and. a < 400)
 !$omp target
   b = 100
   d = omp_is_initial_device ()
@@ -25,11 +25,11 @@ program e_57_1
   a = a + 200
   b = 0

-  !$omp target if(a > 200 .and. a < 400)
+  !$omp target map(from: c) if(a > 200 .and. a < 400)
 c = omp_is_initial_device ()
   !$omp end target

-  !$omp target data map(to: b) if(a > 200 .and. a < 400)
+  !$omp target data map(to: b) map(from: d) if(a > 200 .and. a < 400)
 !$omp target
   b = 100
   d = omp_is_initial_device ()
@@ -41,11 +41,11 @@ program e_57_1
   a = a + 200
   b = 0

-  !$omp target if(a > 200 .and. a < 400)
+  !$omp target map(from: c) if(a > 200 .and. a < 400)
 c = omp_is_initial_device ()
   !$omp end target

-  !$omp target data map(to: b) if(a > 200 .and. a < 400)
+  !$omp target data map(to: b) map(from: d) if(a > 200 .and. a < 400)
 !$omp target
   b = 100
   d = omp_is_initial_device ()



Re: [PATCH] PR fortran/78300 -- class procedure as actual arg

2016-11-14 Thread Janus Weil
2016-11-14 16:10 GMT+01:00 Steve Kargl :
> On Mon, Nov 14, 2016 at 12:29:31PM +0100, Janus Weil wrote:
>> > After looking into this a little bit more, I found that the culprit
>> > seems to be 'resolve_procedure_interface', which does not properly
>> > copy the 'class_ok' attribute. I propose the attached patch to fix
>> > this (regtesting right now) ...
>>
>> The regtest finished successfully. Is that patch ok for trunk?
>>
>
> Yes.  My original patch also regtest without a problem, but
> you have a better fix.  You can also commit the patch to
> 5 and 6 branch.

Thanks, I have committed the patch to trunk as r242392 now:

https://gcc.gnu.org/viewcvs?rev=242392&root=gcc&view=rev

Since it's not a regression, I don't care strongly about backporting.
If you think it's necessary, feel free to do it ...

Cheers,
Janus


Re: [PATCH] PR fortran/78300 -- class procedure as actual arg

2016-11-14 Thread Steve Kargl
On Mon, Nov 14, 2016 at 06:21:06PM +0100, Janus Weil wrote:
> 2016-11-14 16:10 GMT+01:00 Steve Kargl :
> > On Mon, Nov 14, 2016 at 12:29:31PM +0100, Janus Weil wrote:
> >> > After looking into this a little bit more, I found that the culprit
> >> > seems to be 'resolve_procedure_interface', which does not properly
> >> > copy the 'class_ok' attribute. I propose the attached patch to fix
> >> > this (regtesting right now) ...
> >>
> >> The regtest finished successfully. Is that patch ok for trunk?
> >>
> >
> > Yes.  My original patch also regtest without a problem, but
> > you have a better fix.  You can also commit the patch to
> > 5 and 6 branch.
> 
> Thanks, I have committed the patch to trunk as r242392 now:
> 
> https://gcc.gnu.org/viewcvs?rev=242392&root=gcc&view=rev
> 
> Since it's not a regression, I don't care strongly about backporting.
> If you think it's necessary, feel free to do it ...
> 

Understood.  For small patches, I typically try to backport
to at least the 6-branch as a pre-emptive measure.  My
reasoning is that gfortran users will likely use an updated
6.x compiler before trying the newest release.  Thanks for
the fix.

-- 
Steve


Re: [libstdc++, testsuite] Add dg-require-thread-fence

2016-11-14 Thread Mike Stump
On Oct 21, 2016, at 1:00 AM, Christophe Lyon  wrote:
> 
> So if we say that the current behaviour has to keep being the default,
> so that users think about what they are really doing, 

Having a toolchain not work by default to force users to think, isn't a winning 
strategy.

Everything should always, just work.  Those things that don't, we should fix.



Re: [PATCH, vec-tails] Support loop epilogue vectorization

2016-11-14 Thread Richard Biener
On November 14, 2016 4:39:40 PM GMT+01:00, Yuri Rumyantsev  
wrote:
>Richard,
>
>I checked one of the tests designed for epilogue vectorization using
>patches 1 - 3 and found out that build compiler performs vectorization
>of epilogues with --param vect-epilogues-nomask=1 passed:
>
>$ gcc -Ofast -mavx2 t1.c -S --param vect-epilogues-nomask=1 -o
>t1.new-nomask.s -fdump-tree-vect-details
>$ grep VECTORIZED -c t1.c.156t.vect
>4
> Without param only 2 loops are vectorized.
>
>Should I simply add a part of tests related to this feature or I must
>delete all not necessary changes also?

Please remove all not necessary changes.

Richard.

>Thanks.
>Yuri.
>
>2016-11-14 16:40 GMT+03:00 Richard Biener :
>> On Mon, 14 Nov 2016, Yuri Rumyantsev wrote:
>>
>>> Richard,
>>>
>>> In my previous patch I forgot to remove couple lines related to aux
>field.
>>> Here is the correct updated patch.
>>
>> Yeah, I noticed.  This patch would be ok for trunk (together with
>> necessary parts from 1 and 2) if all not required parts are removed
>> (and you'd add the testcases covering non-masked tail vect).
>>
>> Thus, can you please produce a single complete patch containing only
>> non-masked epilogue vectoriziation?
>>
>> Thanks,
>> Richard.
>>
>>> Thanks.
>>> Yuri.
>>>
>>> 2016-11-14 15:51 GMT+03:00 Richard Biener :
>>> > On Fri, 11 Nov 2016, Yuri Rumyantsev wrote:
>>> >
>>> >> Richard,
>>> >>
>>> >> I prepare updated 3 patch with passing additional argument to
>>> >> vect_analyze_loop as you proposed (untested).
>>> >>
>>> >> You wrote:
>>> >> tw, I wonder if you can produce a single patch containing just
>>> >> epilogue vectorization, that is combine patches 1-3 but rip out
>>> >> changes only needed by later patches?
>>> >>
>>> >> Did you mean that I exclude all support for vectorization
>epilogues,
>>> >> i.e. exclude from 2-nd patch all non-related changes
>>> >> like
>>> >>
>>> >> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
>>> >> index 11863af..32011c1 100644
>>> >> --- a/gcc/tree-vect-loop.c
>>> >> +++ b/gcc/tree-vect-loop.c
>>> >> @@ -1120,6 +1120,12 @@ new_loop_vec_info (struct loop *loop)
>>> >>LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
>>> >>LOOP_VINFO_PEELING_FOR_NITER (res) = false;
>>> >>LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
>>> >> +  LOOP_VINFO_CAN_BE_MASKED (res) = false;
>>> >> +  LOOP_VINFO_REQUIRED_MASKS (res) = 0;
>>> >> +  LOOP_VINFO_COMBINE_EPILOGUE (res) = false;
>>> >> +  LOOP_VINFO_MASK_EPILOGUE (res) = false;
>>> >> +  LOOP_VINFO_NEED_MASKING (res) = false;
>>> >> +  LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
>>> >
>>> > Yes.
>>> >
>>> >> Did you mean also that new combined patch must be working patch,
>i.e.
>>> >> can be integrated without other patches?
>>> >
>>> > Yes.
>>> >
>>> >> Could you please look at updated patch?
>>> >
>>> > Will do.
>>> >
>>> > Thanks,
>>> > Richard.
>>> >
>>> >> Thanks.
>>> >> Yuri.
>>> >>
>>> >> 2016-11-10 15:36 GMT+03:00 Richard Biener :
>>> >> > On Thu, 10 Nov 2016, Richard Biener wrote:
>>> >> >
>>> >> >> On Tue, 8 Nov 2016, Yuri Rumyantsev wrote:
>>> >> >>
>>> >> >> > Richard,
>>> >> >> >
>>> >> >> > Here is updated 3 patch.
>>> >> >> >
>>> >> >> > I checked that all new tests related to epilogue
>vectorization passed with it.
>>> >> >> >
>>> >> >> > Your comments will be appreciated.
>>> >> >>
>>> >> >> A lot better now.  Instead of the ->aux dance I now prefer to
>>> >> >> pass the original loops loop_vinfo to vect_analyze_loop as
>>> >> >> optional argument (if non-NULL we analyze the epilogue of that
>>> >> >> loop_vinfo).  OTOH I remember we mainly use it to get at the
>>> >> >> original vectorization factor?  So we can pass down an
>(optional)
>>> >> >> forced vectorization factor as well?
>>> >> >
>>> >> > Btw, I wonder if you can produce a single patch containing just
>>> >> > epilogue vectorization, that is combine patches 1-3 but rip out
>>> >> > changes only needed by later patches?
>>> >> >
>>> >> > Thanks,
>>> >> > Richard.
>>> >> >
>>> >> >> Richard.
>>> >> >>
>>> >> >> > 2016-11-08 15:38 GMT+03:00 Richard Biener
>:
>>> >> >> > > On Thu, 3 Nov 2016, Yuri Rumyantsev wrote:
>>> >> >> > >
>>> >> >> > >> Hi Richard,
>>> >> >> > >>
>>> >> >> > >> I did not understand your last remark:
>>> >> >> > >>
>>> >> >> > >> > That is, here (and avoid the FOR_EACH_LOOP change):
>>> >> >> > >> >
>>> >> >> > >> > @@ -580,12 +586,21 @@ vectorize_loops (void)
>>> >> >> > >> >   && dump_enabled_p ())
>>> >> >> > >> >   dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
>vect_location,
>>> >> >> > >> >"loop vectorized\n");
>>> >> >> > >> > -   vect_transform_loop (loop_vinfo);
>>> >> >> > >> > +   new_loop = vect_transform_loop (loop_vinfo);
>>> >> >> > >> > num_vectorized_loops++;
>>> >> >> > >> >/* Now that the loop has been vectorized, allow
>it to be unrolled
>>> >> >> > >> >   etc.  */
>>> >> >> > >> >  loop->force_vectorize = false;
>>> >> >> > >> >
>>> >> >> > >> > +   /

Re: [PATCH] fix PR sanitizer/78267

2016-11-14 Thread Mike Stump
On Nov 14, 2016, at 8:59 AM, Iain Sandoe  wrote:
> 
>> On 14 Nov 2016, at 16:57, Jack Howarth  wrote:
>> 
>> The attached patch fixes PR sanitizer/78267 by conditionalizing the
>> include of  on the compiler defining __BLOCKS__ as a
>> supported extension. Passes bootstrap on x86_64-apple-darwin15. Okay
>> for gcc trunk?
>> 
> 
> Rainer has that PR assigned and is making a proper fix (which will prevent 
> other failures on the port), unless he decides to punt on it, I think we 
> should wait and get a proper fix,

I'm fine with waiting if Jack wants, but the danger is leaving things in a 
non-working state for too long, and things can pile up.  I'd rather put in the 
fix to the build, now, and, if technology advances to allow that code to be 
removed later, it always can be.  If people are to wait, it is better to know 
how long to wait.  A week, a month, a year?  Then, when that time passes, they 
know to ask again.  How long should we wait?



Re: [PATCH] fix PR sanitizer/78267

2016-11-14 Thread Rainer Orth
Hi Mike,

> On Nov 14, 2016, at 8:59 AM, Iain Sandoe  wrote:
>> 
>>> On 14 Nov 2016, at 16:57, Jack Howarth  wrote:
>>> 
>>> The attached patch fixes PR sanitizer/78267 by conditionalizing the
>>> include of  on the compiler defining __BLOCKS__ as a
>>> supported extension. Passes bootstrap on x86_64-apple-darwin15. Okay
>>> for gcc trunk?
>>> 
>> 
>> Rainer has that PR assigned and is making a proper fix (which will
>> prevent other failures on the port), unless he decides to punt on it, I
>> think we should wait and get a proper fix,
>
> I'm fine with waiting if Jack wants, but the danger is leaving things in a
> non-working state for too long, and things can pile up.  I'd rather put in
> the fix to the build, now, and, if technology advances to allow that code
> to be removed later, it always can be.  If people are to wait, it is better
> to know how long to wait.  A week, a month, a year?  Then, when that time
> passes, they know to ask again.  How long should we wait?

my patch has been verified to work on 10.12.  The revised patch has been
developed with a copy of the affected 10.11 headers, but needs to be
tested on the real thing.  Iain has provided me with the affected 10.10
headers and I expect to have an augmented patch ready today or tomorrow.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


[patch, fortran, committed] Allow dumping of code in Fortran syntax tree

2016-11-14 Thread Thomas Koenig

Hello world,

I have just committed the attached patch as obvious.  No test case,
the function is only supposed to be called when debugging the
compiler itself.

Regards

Thomas

2016-11-14  Thomas Koenig  

* dump-parse-tree.c (show_code):  Add prototype.
(gfc_debug_code):  New function.
(show_code_node):  Add space after SELECT TYPE.

Index: dump-parse-tree.c
===
--- dump-parse-tree.c	(Revision 242335)
+++ dump-parse-tree.c	(Arbeitskopie)
@@ -47,6 +47,7 @@ static FILE *dumpfile;
 static void show_expr (gfc_expr *p);
 static void show_code_node (int, gfc_code *);
 static void show_namespace (gfc_namespace *ns);
+static void show_code (int, gfc_code *);
 
 
 /* Allow dumping of an expression in the debugger.  */
@@ -62,7 +63,19 @@ gfc_debug_expr (gfc_expr *e)
   dumpfile = tmp;
 }
 
+/* Allow for dumping of a piece of code in the debugger.  */
+void gfc_debug_code (gfc_code *c);
 
+void
+gfc_debug_code (gfc_code *c)
+{
+  FILE *tmp = dumpfile;
+  dumpfile = stderr;
+  show_code (1, c);
+  fputc ('\n', dumpfile);
+  dumpfile = tmp;
+}
+
 /* Do indentation for a specific level.  */
 
 static inline void
@@ -1987,7 +2000,7 @@ show_code_node (int level, gfc_code *c)
 case EXEC_SELECT_TYPE:
   d = c->block;
   if (c->op == EXEC_SELECT_TYPE)
-	fputs ("SELECT TYPE", dumpfile);
+	fputs ("SELECT TYPE ", dumpfile);
   else
 	fputs ("SELECT CASE ", dumpfile);
   show_expr (c->expr1);


Re: [PATCH] Fix -Wshadow warning in libiberty/cp-demangle.c

2016-11-14 Thread Ian Lance Taylor
On Sun, Nov 6, 2016 at 10:03 AM, Mark Wielaard  wrote:
>
> We now have -Wshadow=local. So the attached patch uses that for
> libiberty. Is the attached patch OK to commit?

This is OK.

Thanks.

Ian


Re: [PATCH] Enable Intel AVX512_4FMAPS and AVX512_4VNNIW instructions

2016-11-14 Thread Andrew Senkevich
2016-11-11 14:16 GMT+03:00 Uros Bizjak :
> The x86 part of the patch is OK with the above changes and additional
> target attribute test for flags2 ISA features..

Fixed according your comments, I will followup with additional tests soon.


--
WBR,
Andrew


new_avx512_instructions_14.11.patch
Description: Binary data


Re: [PATCH] Enable Intel AVX512_4FMAPS and AVX512_4VNNIW instructions

2016-11-14 Thread Andrew Senkevich
2016-11-11 14:29 GMT+03:00 Jakub Jelinek :
> Hi!
>
> I've noticed preexisting:
>
> On Thu, Nov 10, 2016 at 07:27:00PM +0300, Andrew Senkevich wrote:
>
>> --- a/gcc/config/i386/i386-modes.def
>> +++ b/gcc/config/i386/i386-modes.def
>> @@ -84,6 +84,7 @@ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
>>  VECTOR_MODES (FLOAT, 32); /*V16HF V8SF V4DF */
>>  VECTOR_MODES (FLOAT, 64); /*   V32HF V16SF V8DF */
>>  VECTOR_MODES (FLOAT, 128);/*  V64HF V32SF V16DF */
>
> The VECTOR_MODES (FLOAT, comments don't really match reality, shall we fix
> that?  None of them create V*HF mode, but they do create V*TF mode.

I have fixed it in new patch.


--
WBR,
Andrew


Re: [PATCH, GCC/ARM] Make arm_feature_set agree with type of FL_* macros

2016-11-14 Thread Thomas Preudhomme

Hi Christophe,

No, they were seen when bootstrapping on arm-linux-gnueabihf with the patch to 
to make -mthumb optional. The patch store flags in an arm_feature_set array and 
GCC -Wnarrowing complained about the difference of type.


Best regards,

Thomas

On 14/11/16 17:00, Christophe Lyon wrote:

Hi,


On 14 November 2016 at 15:07, Thomas Preudhomme
 wrote:

Hi,

Currently arm_feature_set is defined in gcc/config/arm/arm-flags as an array
of 2 unsigned long. However, the flags stored in these two entries are
(signed) int, being combinations of bits set via expression of the form 1 <<
bitno. This creates 3 issues:

1) undefined behavior when setting the msb (1 << 31)
2) undefined behavior when storing a flag with msb set (negative int) into
one of the unsigned array entries (positive int)


Just curious: are these problems seen when building with ubsan enabled?


3) waste of space since the top 32 bits of each entry is not used

This patch changes the definition of FL_* macro to be unsigned int by using
the form 1U << bitno instead and changes the definition of arm_feature_set
to be an array of 2 unsigned (int) entries.

Bootstrapped on arm-linux-gnueabihf targeting Thumb-2 state.

Is this ok for trunk?

Best regards,

Thomas


Re: [fixincludes] Fix macOS 10.12 and (PR sanitizer/78267)

2016-11-14 Thread Jack Howarth
Rainer,
Unfortunately this permutation still fails to bootstrap on darwin15...

libtool: compile:
/sw/src/fink.build/gcc7-7.0.0-1/darwin_objdir/./gcc/xgcc
-shared-libgcc -B/sw/src/fink.build/gcc7-7.0.0-1/darwin_objdir/./gcc
-nostdinc++ 
-L/sw/src/fink.build/gcc7-7.0.0-1/darwin_objdir/x86_64-apple-darwin15.6.0/libstdc++-v3/src
-L/sw/src/fink.build/gcc7-7.0.0-1/darwin_objdir/x86_64-apple-darwin15.6.0/libstdc++-v3/src/.libs
-L/sw/src/fink.build/gcc7-7.0.0-1/darwin_objdir/x86_64-apple-darwin15.6.0/libstdc++-v3/libsupc++/.libs
-B/sw/lib/gcc7/x86_64-apple-darwin15.6.0/bin/
-B/sw/lib/gcc7/x86_64-apple-darwin15.6.0/lib/ -isystem
/sw/lib/gcc7/x86_64-apple-darwin15.6.0/include -isystem
/sw/lib/gcc7/x86_64-apple-darwin15.6.0/sys-include -D_GNU_SOURCE
-D_DEBUG -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
-D__STDC_LIMIT_MACROS -DHAVE_RPC_XDR_H=0 -DHAVE_TIRPC_RPC_XDR_H=0 -I.
-I../../../../gcc-7-20161114/libsanitizer/sanitizer_common -I.. -I
../../../../gcc-7-20161114/libsanitizer/include -isystem
../../../../gcc-7-20161114/libsanitizer/include/system -Wall -W
-Wno-unused-parameter -Wwrite-strings -pedantic -Wno-long-long -fPIC
-fno-builtin -fno-exceptions -fno-rtti -fomit-frame-pointer
-funwind-tables -fvisibility=hidden -Wno-variadic-macros
-I../../libstdc++-v3/include
-I../../libstdc++-v3/include/x86_64-apple-darwin15.6.0
-I../../../../gcc-7-20161114/libsanitizer/../libstdc++-v3/libsupc++
-std=gnu++11 -g -O2 -MT sanitizer_mac.lo -MD -MP -MF
.deps/sanitizer_mac.Tpo -c
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc
 -fno-common -DPIC -o .libs/sanitizer_mac.o
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:497:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
   os_trace("Address Sanitizer reported a failure.");

../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:497:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:497:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:500:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
   os_trace("Undefined Behavior Sanitizer reported a failure.");

../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:500:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:500:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:503:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
   os_trace("Thread Sanitizer reported a failure.");

../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:503:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:503:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:505:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
   os_trace("Sanitizer tool reported a failure.");

../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:505:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:505:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:508:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
   os_trace("Consult syslog for more information.");

../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:508:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:508:0:
warning: ignoring #pragma clang diagnostic [-Wunknown-pragmas]
In file included from
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:39:0:
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:
In function ‘void __sanitizer::LogFullErrorReport(const char*)’:
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:497:7:
error: ‘_Static_assert’ was not declared in this scope
   os_trace("Address Sanitizer reported a failure.");
   ^
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:497:7:
note: suggested alternative: ‘__cpp_static_assert’
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanitizer_mac.cc:497:7:
error: ‘_os_trace_with_buffer’ was not declared in this scope
   os_trace("Address Sanitizer reported a failure.");
   ^
../../../../gcc-7-20161114/libsanitizer/sanitizer_common/sanit

Re: [PATCH, GCC/ARM] Make arm_feature_set agree with type of FL_* macros

2016-11-14 Thread Thomas Preudhomme
My apologize, I realized when trying to apply the patch that I wrote it on top 
of the optional -mthumb patch instead of the reverse. I'll rebase it to not 
screw up bisect.


Best regards,

Thomas

On 14/11/16 14:47, Kyrill Tkachov wrote:


On 14/11/16 14:07, Thomas Preudhomme wrote:

Hi,

Currently arm_feature_set is defined in gcc/config/arm/arm-flags as an array
of 2 unsigned long. However, the flags stored in these two entries are
(signed) int, being combinations of bits set via expression of the form 1 <<
bitno. This creates 3 issues:

1) undefined behavior when setting the msb (1 << 31)
2) undefined behavior when storing a flag with msb set (negative int) into one
of the unsigned array entries (positive int)
3) waste of space since the top 32 bits of each entry is not used

This patch changes the definition of FL_* macro to be unsigned int by using
the form 1U << bitno instead and changes the definition of arm_feature_set to
be an array of 2 unsigned (int) entries.

Bootstrapped on arm-linux-gnueabihf targeting Thumb-2 state.

Is this ok for trunk?



Ok.
Thanks,
Kyrill


Best regards,

Thomas




Re: [libstdc++, testsuite] Add dg-require-thread-fence

2016-11-14 Thread Christophe Lyon
On 14 November 2016 at 18:54, Mike Stump  wrote:
> On Oct 21, 2016, at 1:00 AM, Christophe Lyon  
> wrote:
>>
>> So if we say that the current behaviour has to keep being the default,
>> so that users think about what they are really doing,
>
> Having a toolchain not work by default to force users to think, isn't a 
> winning strategy.
>
> Everything should always, just work.  Those things that don't, we should fix.
>
I tend to agree :-)

Maybe Ramana changed his mind and would now no longer want to force
users to think?


Re: [PATCH 04/11] make recog () take a rtx_insn *

2016-11-14 Thread Richard Sandiford
Thanks for doing this.

tbsaunde+...@tbsaunde.org writes:
> diff --git a/gcc/genrecog.c b/gcc/genrecog.c
> index a8e8c22..aa7f629 100644
> --- a/gcc/genrecog.c
> +++ b/gcc/genrecog.c
> @@ -5102,8 +5102,7 @@ print_subroutine (output_state *os, state *s, int 
> proc_id)
>/* For now, the top-level "recog" takes a plain "rtx", and performs a
>   checked cast to "rtx_insn *" for use throughout the rest of the
>   function and the code it calls.  */
> -  const char *insn_param
> -= proc_id > 0 ? "rtx_insn *insn" : "rtx uncast_insn";
> +  const char *insn_param = "rtx_insn *insn";

The comment is no longer true after the patch.  We might as well just
get rid of the variable now that it's equal to a constant string of
almost the same length as the variable name.

Richard


Re: [PATCH] warn on overflow in calls to allocation functions (bugs 77531 and 78284)

2016-11-14 Thread Eric Gallager
On 11/13/16, Martin Sebor  wrote:
> Bug 77531 requests a new warning for calls to allocation functions
> (those declared with attribute alloc_size(X, Y)) that overflow the
> computation X * Z of the size of the allocated object.
>
> Bug 78284 suggests that detecting and diagnosing other common errors
> in calls to allocation functions, such as allocating more space than
> SIZE_MAX / 2 bytes, would help prevent subsequent buffer overflows.
>
> The attached patch adds two new warning options, -Walloc-zero and
> -Walloc-larger-than=bytes that implement these two enhancements.
> The patch is not 100% finished because, as it turns out, the GCC
> allocation built-ins (malloc et al.) do not make use of the
> attribute and so don't benefit from the warnings.  The tests are
> also incomplete, and there's at least one bug in the implementation
> I know about.
>
> I'm posting the patch while stage 1 is still open and to give
> a heads up on it and to get early feedback.  I expect completing
> it will be straightforward.
>
> Martin
>
> PS The alloc_max_size function added in the patch handles sizes
> specified using suffixes like KB, MB, etc.  I added that to make
> it possible to specify sizes in excess of the maximum of INT_MAX
> that (AFAIK) options that take integer arguments handle out of
> the box.  It only belatedly occurred to me that the suffixes
> are unnecessary if the option argument is handled using strtoull.
> I can remove the suffix (as I suspect it will raise objections)
> but I think that a general solution along these lines would be
> useful to let users specify large byte sizes in other options
> as well (such -Walloca-larger-than, -Wvla-larger-then).  Are
> there any suggestions or preferences?
>


-Walloc-larger-than looks way too similar to -Walloca-larger-than; at
first I was confused as to why you were adding the same flag again
until I spotted the one letter difference. Maybe come up with a name
that looks more distinct? Just something to bikeshed about.


Re: Default associative containers constructors/destructor/assignment

2016-11-14 Thread François Dumont

Any feedback regarding this patch ?

François

On 02/11/2016 22:37, François Dumont wrote:

Hi

Here is an updated proposal, I realized that the newly introduced 
_M_move_data can be also used in the swap implementation.


Let me know if you prefer without it or not.

François


On 28/10/2016 21:42, François Dumont wrote:

Hi

Here is the patch to default all other associative containers 
operations that can be defaulted.


To do so I introduce a _Rb_tree_key_compare type that take care 
of value initialization of compare functor. It also make sure that 
functor is copied rather than move in move constructor with necessary 
noexcept qualification.


I also introduce _Rb_tree_header to take care of the 
initialization of the _Rb_tree_node_base used in the container header 
and of _M_node_count. I also use it to implement the move semantic 
and so default also _Rb_tree_impl move construtor.


I also propose a solution for the FIXME regarding documentation 
of container destructor, I used C++11 default declaration. I don't 
have necessary tools to generate Doxygen doc but I am confident that 
it should work fine. I had to simplify doc for operations that are 
now defaulted.



* include/bits/stl_map.h (map(const map&)): Make default.
(map(map&&)): Likewise.
(~map()): Likewise.
(operator=(const map&)): Likewise.
* include/bits/stl_multimap.h (multimap(const multimap&)): Make 
default.

(multimap(multimap&&)): Likewise.
(~multimap()): Likewise.
(operator=(const multimap&)): Likewise.
* include/bits/stl_set.h (set(const set&)): Make default.
(set(set&&)): Likewise.
(~set()): Likewise.
(operator=(const set&)): Likewise.
* include/bits/stl_multiset.h (multiset(const multiset&)): Make 
default.

(multiset(multiset&&)): Likewise.
(~multiset()): Likewise.
(operator=(const multiset&)): Likewise.
* include/bits/stl_tree.h (_Rb_tree_key_compare<>): New.
(_Rb_tree_header): New.
(_Rb_tree_impl): Inherit from latter.
(_Rb_tree_impl()): Make default.
(_Rb_tree_impl(const _Rb_tree_impl&)): New.
(_Rb_tree_impl(_Rb_tree_impl&&)): New, default.
(_Rb_tree_impl::_M_reset): Move...
(_Rb_tree_header::_M_reset): ...here.
(_Rb_tree_impl::_M_initialize): Move...
(_Rb_tree_header::_M_initialize): ...here.
(_Rb_tree(_Rb_tree&&)): Make default.
(_Rb_tree_header::_M_move_data(_Rb_tree_header&)): New.
(_Rb_tree<>::_M_move_data(_Rb_tree&, true_type)): Use latter.

Tested under Linux x86_64, ok to commit ?

François







  1   2   >