Re: [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges

2017-05-03 Thread Daniel Santos

On 05/03/2017 01:10 AM, Uros Bizjak wrote:

The order of subexpressions of parallel in general does not matter.


Thanks, this makes things much clearer.


Also, I'm wondering if there's anything wrong with calling ix86_gen_leave ()
and plucking the insns out of the generated parallel insn and moving that
into my own parallel rather than generating them in my own function.  I
guess all the matters is what is cleanest.

Hm... I'd rather see subexpressions generated "by hand".


OK.  While we're on the topic, are you OK with my changes to 
ix86_emit_leave to generate the notes or would you prefer those by hand 
as well?


Also, are these predicates what you had in mind?  (I haven't actually 
tested them just yet.)


(define_predicate "save_multiple"
  (match_code "parallel")
{
  const unsigned len = XVECLEN (op, 0);
  unsigned i;

  /* Starting from end of vector, count register saves.  */
  for (i = 0; i < len; ++i)
{
  rtx src, dest, addr;
  rtx e = XVECEXP (op, 0, len - 1 - i);

  if (GET_CODE (e) != SET)
break;

  src  = SET_SRC (e);
  dest = SET_DEST (e);

  if (!REG_P (src) || !MEM_P (dest))
break;

  addr = XEXP (dest, 0);

  /* Good if dest address is in RAX.  */
  if (REG_P (addr) && REGNO (addr) == AX_REG)
continue;

  /* Good if dest address is offset of RAX.  */
  if (GET_CODE (addr) == PLUS
  && REG_P (XEXP (addr, 0))
  && REGNO (XEXP (addr, 0)) == AX_REG)
continue;

  break;
}
  return (i >= 12 && i <= 18);
})


(define_predicate "restore_multiple"
  (match_code "parallel")
{
  const unsigned len = XVECLEN (op, 0);
  unsigned i;

  /* Starting from end of vector, count register restores.  */
  for (i = 0; i < len; ++i)
{
  rtx src, dest, addr;
  rtx e = XVECEXP (op, 0, len - 1 - i);

  if (GET_CODE (e) != SET)
break;

  src  = SET_SRC (e);
  dest = SET_DEST (e);

  if (!MEM_P (src) || !REG_P (dest))
break;

  addr = XEXP (src, 0);

  /* Good if src address is in RSI.  */
  if (REG_P (addr) && REGNO (addr) == SI_REG)
continue;

  /* Good if src address is offset of RSI.  */
  if (GET_CODE (addr) == PLUS
  && REG_P (XEXP (addr, 0))
  && REGNO (XEXP (addr, 0)) == SI_REG)
continue;

  break;
}
  return (i >= 12 && i <= 18);
})


Thanks,
Daniel



[testsuite, committed] Add quotes to numerical comment arg of dg directive

2017-05-03 Thread Tom de Vries

Hi,

this patch adds quotes to the comment argument of a dg-message directive 
when the comment is a plain number, to avoid confusion with line numbers.


Committed as obvious.

Thanks,
- Tom

Add quotes to numerical comment arg of dg directive

2017-05-01  Tom de Vries  

	* c-c++-common/goacc/data-default-1.c: Add quotes to numerical comment
	arg of dg directive.
	* c-c++-common/goacc/routine-3.c: Same.
	* c-c++-common/goacc/routine-4.c: Same.

---
 gcc/testsuite/c-c++-common/goacc/data-default-1.c | 4 ++--
 gcc/testsuite/c-c++-common/goacc/routine-3.c  | 4 ++--
 gcc/testsuite/c-c++-common/goacc/routine-4.c  | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/goacc/data-default-1.c b/gcc/testsuite/c-c++-common/goacc/data-default-1.c
index 631032e..ec53cbe 100644
--- a/gcc/testsuite/c-c++-common/goacc/data-default-1.c
+++ b/gcc/testsuite/c-c++-common/goacc/data-default-1.c
@@ -6,13 +6,13 @@ int main ()
   int n = 2;
   int ary[2];
   
-#pragma acc parallel default (none) /* { dg-message "'parallel' construct" 2 } */
+#pragma acc parallel default (none) /* { dg-message "'parallel' construct" "2" } */
   {
 ary[0] /* { dg-error "not specified in enclosing" } */
   = n; /* { dg-error "not specified in enclosing" } */
   }
 
-#pragma acc kernels default (none) /* { dg-message "'kernels' construct" 2 } */
+#pragma acc kernels default (none) /* { dg-message "'kernels' construct" "2" } */
   {
 ary[0] /* { dg-error "not specified in enclosing" } */
   = n; /* { dg-error "not specified in enclosing" } */
diff --git a/gcc/testsuite/c-c++-common/goacc/routine-3.c b/gcc/testsuite/c-c++-common/goacc/routine-3.c
index b322d26..eaea470 100644
--- a/gcc/testsuite/c-c++-common/goacc/routine-3.c
+++ b/gcc/testsuite/c-c++-common/goacc/routine-3.c
@@ -2,7 +2,7 @@
 
 #pragma acc routine gang
 int
-gang () /* { dg-message "declared here" 3 } */
+gang () /* { dg-message "declared here" "3" } */
 {
   #pragma acc loop gang worker vector
   for (int i = 0; i < 10; i++)
@@ -14,7 +14,7 @@ gang () /* { dg-message "declared here" 3 } */
 
 #pragma acc routine worker
 int
-worker () /* { dg-message "declared here" 2 } */
+worker () /* { dg-message "declared here" "2" } */
 {
   #pragma acc loop worker vector
   for (int i = 0; i < 10; i++)
diff --git a/gcc/testsuite/c-c++-common/goacc/routine-4.c b/gcc/testsuite/c-c++-common/goacc/routine-4.c
index 3e5fc4f..efc4a0b 100644
--- a/gcc/testsuite/c-c++-common/goacc/routine-4.c
+++ b/gcc/testsuite/c-c++-common/goacc/routine-4.c
@@ -35,7 +35,7 @@ void seq (void)
 red ++;
 }
 
-void vector (void) /* { dg-message "declared here" 1 } */
+void vector (void) /* { dg-message "declared here" "1" } */
 {
   gang ();  /* { dg-error "routine call uses" } */
   worker ();  /* { dg-error "routine call uses" } */
@@ -61,7 +61,7 @@ void vector (void) /* { dg-message "declared here" 1 } */
 red ++;
 }
 
-void worker (void) /* { dg-message "declared here" 2 } */
+void worker (void) /* { dg-message "declared here" "2" } */
 {
   gang ();  /* { dg-error "routine call uses" } */
   worker ();
@@ -87,7 +87,7 @@ void worker (void) /* { dg-message "declared here" 2 } */
 red ++;
 }
 
-void gang (void) /* { dg-message "declared here" 3 } */
+void gang (void) /* { dg-message "declared here" "3" } */
 {
   gang ();
   worker ();


[testsuite, committed] Replace absolute line numbers in c-c++-common

2017-05-03 Thread Tom de Vries

Hi,

this patch replaces absolute line numbers in the c-c++-common directory.

Committed as obvious.

Thanks,
- Tom
Replace absolute line numbers in c-c++-common

2017-05-01  Tom de Vries  

	PR testsuite/80557
	* c-c++-common/Wshift-negative-value-1.c: Replace absolute line numbers.
	* c-c++-common/Wshift-negative-value-2.c: Same.
	* c-c++-common/Wshift-negative-value-3.c: Same.
	* c-c++-common/Wshift-negative-value-4.c: Same.
	* c-c++-common/cilk-plus/AN/pr57541.c: Same.
	* c-c++-common/cpp/pr60400.c: Same.
	* c-c++-common/fmax-errors.c: Same.
	* c-c++-common/goacc/data-2.c: Same.
	* c-c++-common/goacc/host_data-2.c: Same.
	* c-c++-common/gomp/simd4.c: Same.
	* c-c++-common/pr28656.c: Same.
	* c-c++-common/pr43395.c: Same.
	* c-c++-common/torture/pr57945.c: Same.

---
 gcc/testsuite/c-c++-common/Wshift-negative-value-1.c |  3 +--
 gcc/testsuite/c-c++-common/Wshift-negative-value-2.c |  5 ++---
 gcc/testsuite/c-c++-common/Wshift-negative-value-3.c |  5 ++---
 gcc/testsuite/c-c++-common/Wshift-negative-value-4.c |  5 ++---
 gcc/testsuite/c-c++-common/cilk-plus/AN/pr57541.c|  2 +-
 gcc/testsuite/c-c++-common/cpp/pr60400.c |  8 
 gcc/testsuite/c-c++-common/fmax-errors.c |  2 +-
 gcc/testsuite/c-c++-common/goacc/data-2.c|  2 +-
 gcc/testsuite/c-c++-common/goacc/host_data-2.c   |  8 
 gcc/testsuite/c-c++-common/gomp/simd4.c  |  4 ++--
 gcc/testsuite/c-c++-common/pr28656.c | 14 +++---
 gcc/testsuite/c-c++-common/pr43395.c | 12 ++--
 gcc/testsuite/c-c++-common/torture/pr57945.c |  3 +--
 13 files changed, 34 insertions(+), 39 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c b/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c
index 8f14034..7df1804 100644
--- a/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c
+++ b/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c
@@ -7,6 +7,7 @@ enum E {
   A = 0 << 1,
   B = 1 << 1,
   C = -1 << 1, /* { dg-warning "left shift of negative value|not an integer constant" } */
+  /* { dg-error "left operand of shift expression" "shift" { target c++ } .-1 } */
   D = 0 >> 1,
   E = 1 >> 1,
   F = -1 >> 1
@@ -47,5 +48,3 @@ right (int x)
   r += -1U >> x;
   return r;
 }
-
-/* { dg-error "left operand of shift expression" "shift" { target c++ } 9 } */
diff --git a/gcc/testsuite/c-c++-common/Wshift-negative-value-2.c b/gcc/testsuite/c-c++-common/Wshift-negative-value-2.c
index 55523a5..3a60ed7 100644
--- a/gcc/testsuite/c-c++-common/Wshift-negative-value-2.c
+++ b/gcc/testsuite/c-c++-common/Wshift-negative-value-2.c
@@ -7,6 +7,8 @@ enum E {
   A = 0 << 1,
   B = 1 << 1,
   C = -1 << 1, /* { dg-warning "left shift of negative value" } */
+  /* { dg-error "not an integer constant" "no constant" { target c++ } .-1 } */
+  /* { dg-error "left operand of shift expression" "shift" { target c++ } .-2 } */
   D = 0 >> 1,
   E = 1 >> 1,
   F = -1 >> 1
@@ -47,6 +49,3 @@ right (int x)
   r += -1U >> x;
   return r;
 }
-
-/* { dg-error "not an integer constant" "no constant" { target c++ } 9 } */
-/* { dg-error "left operand of shift expression" "shift" { target c++ } 9 } */
diff --git a/gcc/testsuite/c-c++-common/Wshift-negative-value-3.c b/gcc/testsuite/c-c++-common/Wshift-negative-value-3.c
index 1295b72..503ca61 100644
--- a/gcc/testsuite/c-c++-common/Wshift-negative-value-3.c
+++ b/gcc/testsuite/c-c++-common/Wshift-negative-value-3.c
@@ -7,6 +7,8 @@ enum E {
   A = 0 << 1,
   B = 1 << 1,
   C = -1 << 1,
+  /* { dg-error "not an integer constant" "no constant" { target c++ } .-1 } */
+  /* { dg-error "left operand of shift expression" "shift" { target c++ } .-2 } */
   D = 0 >> 1,
   E = 1 >> 1,
   F = -1 >> 1
@@ -47,6 +49,3 @@ right (int x)
   r += -1U >> x;
   return r;
 }
-
-/* { dg-error "not an integer constant" "no constant" { target c++ } 9 } */
-/* { dg-error "left operand of shift expression" "shift" { target c++ } 9 } */
diff --git a/gcc/testsuite/c-c++-common/Wshift-negative-value-4.c b/gcc/testsuite/c-c++-common/Wshift-negative-value-4.c
index 3088220..fa7cb4e 100644
--- a/gcc/testsuite/c-c++-common/Wshift-negative-value-4.c
+++ b/gcc/testsuite/c-c++-common/Wshift-negative-value-4.c
@@ -7,6 +7,8 @@ enum E {
   A = 0 << 1,
   B = 1 << 1,
   C = -1 << 1,
+  /* { dg-error "not an integer constant" "no constant" { target c++ } .-1 } */
+  /* { dg-error "left operand of shift expression" "shift" { target c++ } .-2 } */
   D = 0 >> 1,
   E = 1 >> 1,
   F = -1 >> 1
@@ -47,6 +49,3 @@ right (int x)
   r += -1U >> x;
   return r;
 }
-
-/* { dg-error "not an integer constant" "no constant" { target c++ } 9 } */
-/* { dg-error "left operand of shift expression" "shift" { target c++ } 9 } */
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/AN/pr57541.c b/gcc/testsuite/c-c++-common/cilk-plus/AN/pr57541.c
index f379e46..a956d0e 100755
--- a/gcc/testsuite/c-c++-common/cilk-plus/AN/pr57541.c
+++ b/gcc/testsuite/c-c++-common/cilk-p

[committed] Wrap tree-data-ref.h macro arguments

2017-05-03 Thread Richard Sandiford
Tested on aarch64-linux-gnu and x86_64-linux-gnu.  Installed as obvious.

Thanks,
Richard


gcc/
2016-05-03  Richard Sandiford  

* tree-data-ref.h (SUB_CONFLICTS_IN_A): Wrap SUB argument in brackets.
(SUB_CONFLICTS_IN_B, SUB_LAST_CONFLICT, SUB_DISTANCE): Likewise.
(DDR_A): Wrap DDR argument in brackets.
(DDR_B, DDR_AFFINE_P, DDR_ARE_DEPENDENT, DDR_SUBSCRIPTS): Likewise.
(DDR_LOOP_NEST, DDR_INNER_LOOP, DDR_SELF_REFERENCE): Likewise.
(DDR_REVERSED_P): Likewise.

Index: gcc/tree-data-ref.h
===
--- gcc/tree-data-ref.h 2017-02-23 19:54:15.0 +
+++ gcc/tree-data-ref.h 2017-05-03 08:48:11.977015306 +0100
@@ -209,10 +209,10 @@ struct subscript
 
 typedef struct subscript *subscript_p;
 
-#define SUB_CONFLICTS_IN_A(SUB) SUB->conflicting_iterations_in_a
-#define SUB_CONFLICTS_IN_B(SUB) SUB->conflicting_iterations_in_b
-#define SUB_LAST_CONFLICT(SUB) SUB->last_conflict
-#define SUB_DISTANCE(SUB) SUB->distance
+#define SUB_CONFLICTS_IN_A(SUB) (SUB)->conflicting_iterations_in_a
+#define SUB_CONFLICTS_IN_B(SUB) (SUB)->conflicting_iterations_in_b
+#define SUB_LAST_CONFLICT(SUB) (SUB)->last_conflict
+#define SUB_DISTANCE(SUB) (SUB)->distance
 
 /* A data_dependence_relation represents a relation between two
data_references A and B.  */
@@ -268,20 +268,20 @@ struct data_dependence_relation
 
 typedef struct data_dependence_relation *ddr_p;
 
-#define DDR_A(DDR) DDR->a
-#define DDR_B(DDR) DDR->b
-#define DDR_AFFINE_P(DDR) DDR->affine_p
-#define DDR_ARE_DEPENDENT(DDR) DDR->are_dependent
-#define DDR_SUBSCRIPTS(DDR) DDR->subscripts
+#define DDR_A(DDR) (DDR)->a
+#define DDR_B(DDR) (DDR)->b
+#define DDR_AFFINE_P(DDR) (DDR)->affine_p
+#define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent
+#define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts
 #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I]
 #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length ()
 
-#define DDR_LOOP_NEST(DDR) DDR->loop_nest
+#define DDR_LOOP_NEST(DDR) (DDR)->loop_nest
 /* The size of the direction/distance vectors: the number of loops in
the loop nest.  */
 #define DDR_NB_LOOPS(DDR) (DDR_LOOP_NEST (DDR).length ())
-#define DDR_INNER_LOOP(DDR) DDR->inner_loop
-#define DDR_SELF_REFERENCE(DDR) DDR->self_reference_p
+#define DDR_INNER_LOOP(DDR) (DDR)->inner_loop
+#define DDR_SELF_REFERENCE(DDR) (DDR)->self_reference_p
 
 #define DDR_DIST_VECTS(DDR) ((DDR)->dist_vects)
 #define DDR_DIR_VECTS(DDR) ((DDR)->dir_vects)
@@ -293,7 +293,7 @@ #define DDR_DIR_VECT(DDR, I) \
   DDR_DIR_VECTS (DDR)[I]
 #define DDR_DIST_VECT(DDR, I) \
   DDR_DIST_VECTS (DDR)[I]
-#define DDR_REVERSED_P(DDR) DDR->reversed_p
+#define DDR_REVERSED_P(DDR) (DDR)->reversed_p
 
 
 bool dr_analyze_innermost (struct data_reference *, struct loop *);


Alternative check for vector refs with same alignment

2017-05-03 Thread Richard Sandiford
vect_find_same_alignment_drs uses the ddr dependence distance
to tell whether two references have the same alignment.  Although
that's safe with the current code, there's no particular reason
why a dependence distance of 0 should mean that the accesses start
on the same byte.  E.g. a reference to a full complex value could
in principle depend on a reference to the imaginary component.
A later patch adds support for this kind of dependence.

On the other side, checking modulo vf is pessimistic when the step
divided by the element size is a factor of 2.

This patch instead looks for cases in which the drs have the same
base, offset and step, and for which the difference in their constant
initial values is a multiple of the alignment.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
2017-05-03  Richard Sandiford  

* tree-vect-data-refs.c (vect_find_same_alignment_drs): Remove
loop_vinfo argument and use of dependence distance vectors.
Check instead whether the two references differ only in their
initial value and assume that they have the same alignment if the
difference is a multiple of the vector alignment.
(vect_analyze_data_refs_alignment): Update call accordingly.

gcc/testsuite/
* gcc.dg/vect/vect-103.c: Update wording of dump message.

Index: gcc/tree-vect-data-refs.c
===
--- gcc/tree-vect-data-refs.c   2017-04-18 19:52:35.060164268 +0100
+++ gcc/tree-vect-data-refs.c   2017-05-03 08:48:30.536704993 +0100
@@ -2042,20 +2042,12 @@ vect_enhance_data_refs_alignment (loop_v
vectorization factor.  */
 
 static void
-vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
- loop_vec_info loop_vinfo)
+vect_find_same_alignment_drs (struct data_dependence_relation *ddr)
 {
-  unsigned int i;
-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   struct data_reference *dra = DDR_A (ddr);
   struct data_reference *drb = DDR_B (ddr);
   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
-  int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra;
-  int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb;
-  lambda_vector dist_v;
-  unsigned int loop_depth;
 
   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 return;
@@ -2063,48 +2055,37 @@ vect_find_same_alignment_drs (struct dat
   if (dra == drb)
 return;
 
-  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
-return;
-
-  /* Loop-based vectorization and known data dependence.  */
-  if (DDR_NUM_DIST_VECTS (ddr) == 0)
-return;
-
-  /* Data-dependence analysis reports a distance vector of zero
- for data-references that overlap only in the first iteration
- but have different sign step (see PR45764).
- So as a sanity check require equal DR_STEP.  */
-  if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
+  if (!operand_equal_p (DR_BASE_OBJECT (dra), DR_BASE_OBJECT (drb),
+   OEP_ADDRESS_OF)
+  || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0)
+  || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
 return;
 
-  loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
-  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
+  /* Two references with distance zero have the same alignment.  */
+  offset_int diff = (wi::to_offset (DR_INIT (dra))
+- wi::to_offset (DR_INIT (drb)));
+  if (diff != 0)
 {
-  int dist = dist_v[loop_depth];
+  /* Get the wider of the two alignments.  */
+  unsigned int align_a = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmtinfo_a));
+  unsigned int align_b = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE (stmtinfo_b));
+  unsigned int max_align = MAX (align_a, align_b);
+
+  /* Require the gap to be a multiple of the larger vector alignment.  */
+  if (!wi::multiple_of_p (diff, max_align, SIGNED))
+   return;
+}
 
-  if (dump_enabled_p ())
-   dump_printf_loc (MSG_NOTE, vect_location,
-"dependence distance  = %d.\n", dist);
-
-  /* Same loop iteration.  */
-  if (dist == 0
- || (dist % vectorization_factor == 0 && dra_size == drb_size))
-   {
- /* Two references with distance zero have the same alignment.  */
- STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
- STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
- if (dump_enabled_p ())
-   {
- dump_printf_loc (MSG_NOTE, vect_location,
-  "accesses have the same alignment.\n");
- dump_printf (MSG_NOTE,
-  "dependence distance modulo vf == 0 between ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
- d

Handle data dependence relations with different bases

2017-05-03 Thread Richard Sandiford
This patch tries to calculate conservatively-correct distance
vectors for two references whose base addresses are not the same.
It sets a new flag DDR_COULD_BE_INDEPENDENT_P if the dependence
isn't guaranteed to occur.

The motivating example is:

  struct s { int x[8]; };
  void
  f (struct s *a, struct s *b)
  {
for (int i = 0; i < 8; ++i)
  a->x[i] += b->x[i];
  }

in which the "a" and "b" accesses are either independent or have a
dependence distance of 0 (assuming -fstrict-aliasing).  Neither case
prevents vectorisation, so we can vectorise without an alias check.

I'd originally wanted to do the same thing for arrays as well, e.g.:

  void
  f (int a[][8], struct b[][8])
  {
for (int i = 0; i < 8; ++i)
  a[0][i] += b[0][i];
  }

I think this is valid because C11 6.7.6.2/6 says:

  For two array types to be compatible, both shall have compatible
  element types, and if both size specifiers are present, and are
  integer constant expressions, then both size specifiers shall have
  the same constant value.

So if we access an array through an int (*)[8], it must have type X[8]
or X[], where X is compatible with int.  It doesn't seem possible in
either case for "a[0]" and "b[0]" to overlap when "a != b".

However, Richard B said that (at least in gimple) we support arbitrary
overlap of arrays and allow arrays to be accessed with different
dimensionality.  There are examples of this in PR50067.  I've therefore
only handled references that end in a structure field access.

There are two ways of handling these dependences in the vectoriser:
use them to limit VF, or check at runtime as before.  I've gone for
the approach of checking at runtime if we can, to avoid limiting VF
unnecessarily.  We still fall back to a VF cap when runtime checks
aren't allowed.

The patch tests whether we queued an alias check with a dependence
distance of X and then picked a VF <= X, in which case it's safe to
drop the alias check.  Since vect_prune_runtime_alias_check_list can
be called twice with different VF for the same loop, it's no longer
safe to clear may_alias_ddrs on exit.  Instead we should use
comp_alias_ddrs to check whether versioning is necessary.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
2017-05-03  Richard Sandiford  

* tree-data-ref.h (subscript): Add access_fn field.
(data_dependence_relation): Add could_be_independent_p.
(SUB_ACCESS_FN, DDR_COULD_BE_INDEPENDENT_P): New macros.
(same_access_functions): Move to tree-data-ref.c.
* tree-data-ref.c (ref_contains_union_access_p): New function.
(dump_data_dependence_relation): Use SUB_ACCESS_FN instead of
DR_ACCESS_FN.
(constant_access_functions): Likewise.
(add_other_self_distances): Likewise.
(same_access_functions): Likewise.  (Moved from tree-data-ref.h.)
(initialize_data_dependence_relation): Use XCNEW and remove
explicit zeroing of DDR_REVERSED_P.  Look for a subsequence
of access functions that have the same type.  Allow the
subsequence to end with different bases in some circumstances.
Record the chosen access functions in SUB_ACCESS_FN.
(build_classic_dist_vector_1): Replace ddr_a and ddr_b with
a_index and b_index.  Use SUB_ACCESS_FN instead of DR_ACCESS_FN.
(subscript_dependence_tester_1): Likewise dra and drb.
(build_classic_dist_vector): Update calls accordingly.
(subscript_dependence_tester): Likewise.
* tree-ssa-loop-prefetch.c (determine_loop_nest_reuse): Check
DDR_COULD_BE_INDEPENDENT_P.
* tree-vectorizer.h (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Test
comp_alias_ddrs instead of may_alias_ddrs.
* tree-vect-data-refs.c (vect_analyze_data_ref_dependence): Try
to mark for aliasing if DDR_COULD_BE_INDEPENDENT_P, but fall back
to using the recorded distance vectors if that fails.
(dependence_distance_ge_vf): New function.
(vect_prune_runtime_alias_test_list): Use it.  Don't clear
LOOP_VINFO_MAY_ALIAS_DDRS.

gcc/testsuite/
* gcc.dg/vect/vect-alias-check-3.c: New test.
* gcc.dg/vect/vect-alias-check-4.c: Likewise.
* gcc.dg/vect/vect-alias-check-5.c: Likewise.

Index: gcc/tree-data-ref.h
===
--- gcc/tree-data-ref.h 2017-05-03 08:48:11.977015306 +0100
+++ gcc/tree-data-ref.h 2017-05-03 08:48:48.737038502 +0100
@@ -191,6 +191,9 @@ struct conflict_function
 
 struct subscript
 {
+  /* The access functions of the two references.  */
+  tree access_fn[2];
+
   /* A description of the iterations for which the elements are
  accessed twice.  */
   conflict_function *conflicting_iterations_in_a;
@@ -209,6 +212,7 @@ struct subscript
 
 typedef struct subscript *subscript_p;
 
+#define SUB_ACCESS_FN(SUB, I) (SUB)->access_fn[I]
 #define SUB_CONFLICTS_IN_A(SUB) (SUB)->conflicting_ite

Use base inequality for some vector alias checks

2017-05-03 Thread Richard Sandiford
This patch checks whether two data references x and y cannot
partially overlap and so are independent whenever &x != &y.
We can then use this in the vectoriser to optimise alias checks.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
2016-05-03  Richard Sandiford  

* hash-traits.h (pair_hash): New struct.
* tree-data-ref.h (data_dependence_relation): Add object_a and
object_b fields.
(DDR_OBJECT_A, DDR_OBJECT_B): New macros.
* tree-data-ref.c (initialize_data_dependence_relation): Initialize
DDR_OBJECT_A and DDR_OBJECT_B.
* tree-vectorizer.h (vec_object_pair): New type.
(_loop_vec_info): Add a check_unequal_addrs field.
(LOOP_VINFO_CHECK_UNEQUAL_ADDRS): New macro.
(LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Return true if there is an
entry in check_unequal_addrs.  Check comp_alias_ddrs instead of
may_alias_ddrs.
* tree-vect-loop.c (destroy_loop_vec_info): Release
LOOP_VINFO_CHECK_UNEQUAL_ADDRS.
(vect_analyze_loop_2): Likewise, when restarting.
(vect_estimate_min_profitable_iters): Estimate the cost of
LOOP_VINFO_CHECK_UNEQUAL_ADDRS.
* tree-vect-data-refs.c: Include tree-hash-traits.h.
(vect_prune_runtime_alias_test_list): Try to handle conflicts
using LOOP_VINFO_CHECK_UNEQUAL_ADDRS, if the data dependence allows.
Count such tests in the final summary.
* tree-vect-loop-manip.c (chain_cond_expr): New function.
(vect_create_cond_for_align_checks): Use it.
(vect_create_cond_for_alias_checks): Likewise.
(vect_create_cond_for_unequal_addrs): New function.
(vect_loop_versioning): Call it.

gcc/testsuite/
* gcc.dg/vect/vect-alias-check-6.c: New test.

Index: gcc/hash-traits.h
===
--- gcc/hash-traits.h   2017-02-23 19:54:15.0 +
+++ gcc/hash-traits.h   2017-05-03 08:48:53.312035228 +0100
@@ -301,6 +301,76 @@ struct ggc_cache_ptr_hash : pointer_hash
 
 struct nofree_string_hash : string_hash, typed_noop_remove  {};
 
+/* Traits for pairs of values, using the first to record empty and
+   deleted slots.  */
+
+template 
+struct pair_hash
+{
+  typedef std::pair  value_type;
+  typedef std::pair  compare_type;
+
+  static inline hashval_t hash (const value_type &);
+  static inline bool equal (const value_type &, const compare_type &);
+  static inline void remove (value_type &);
+  static inline void mark_deleted (value_type &);
+  static inline void mark_empty (value_type &);
+  static inline bool is_deleted (const value_type &);
+  static inline bool is_empty (const value_type &);
+};
+
+template 
+inline hashval_t
+pair_hash ::hash (const value_type &x)
+{
+  return iterative_hash_hashval_t (T1::hash (x.first), T2::hash (x.second));
+}
+
+template 
+inline bool
+pair_hash ::equal (const value_type &x, const compare_type &y)
+{
+  return T1::equal (x.first, y.first) && T2::equal (x.second, y.second);
+}
+
+template 
+inline void
+pair_hash ::remove (value_type &x)
+{
+  T1::remove (x.first);
+  T2::remove (x.second);
+}
+
+template 
+inline void
+pair_hash ::mark_deleted (value_type &x)
+{
+  T1::mark_deleted (x.first);
+}
+
+template 
+inline void
+pair_hash ::mark_empty (value_type &x)
+{
+  T1::mark_empty (x.first);
+}
+
+template 
+inline bool
+pair_hash ::is_deleted (const value_type &x)
+{
+  return T1::is_deleted (x.first);
+}
+
+template 
+inline bool
+pair_hash ::is_empty (const value_type &x)
+{
+  return T1::is_empty (x.first);
+}
+
 template  struct default_hash_traits : T {};
 
 template 
Index: gcc/tree-data-ref.h
===
--- gcc/tree-data-ref.h 2017-05-03 08:48:48.737038502 +0100
+++ gcc/tree-data-ref.h 2017-05-03 08:48:53.313041828 +0100
@@ -240,6 +240,13 @@ struct data_dependence_relation
but the analyzer cannot be more specific.  */
   tree are_dependent;
 
+  /* If nonnull, COULD_BE_INDEPENDENT_P is true and the accesses are
+ independent when the runtime addresses of OBJECT_A and OBJECT_B
+ are different.  The addresses of both objects are invariant in the
+ loop nest.  */
+  tree object_a;
+  tree object_b;
+
   /* For each subscript in the dependence test, there is an element in
  this array.  This is the attribute that labels the edge A->B of
  the data_dependence_relation.  */
@@ -303,6 +310,8 @@ #define DDR_A(DDR) (DDR)->a
 #define DDR_B(DDR) (DDR)->b
 #define DDR_AFFINE_P(DDR) (DDR)->affine_p
 #define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent
+#define DDR_OBJECT_A(DDR) (DDR)->object_a
+#define DDR_OBJECT_B(DDR) (DDR)->object_b
 #define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts
 #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I]
 #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length ()
Index: gcc/tree-data-ref.c
===

Re: [PATCH] Optimize in VRP loads from constant arrays (take 2)

2017-05-03 Thread Jakub Jelinek
On Tue, May 02, 2017 at 02:50:16PM +0200, Richard Biener wrote:
> > If array_at_struct_end_p is wrong, it should be fixed ;)
> 
> Indeed.  It was originally meant to say false if you can trust
> TYPE_DOMAIN of the array but now it says false if there's some means
> to constrain the array size (the DECL_P path and now your STRING_CST
> one).  But all callers afterwards just look at TYPE_DOMAIN ...

So shall we verify that TYPE_DOMAIN is consistent with the object size
in that case inside of array_at_struct_end_p?

> > > I'd restructure the patch quite different, using for_each_index on the
> > > ref gather an array of index pointers (bail out on sth unhandled).
> > > Then I'd see if I have interesting ranges for them, if not, bail out.
> > > Also compute the size product of all ranges and test that against
> > > PARAM_MAX_VRP_CONSTANT_ARRAY_LOADS.  Then store the minimum range
> > > value in the index places (temporarily) and use get_base_ref_and_extent to
> > > get at the constant "starting" offset.  From there iterate using
> > > the remembered indices (remember the ref tree as well via for_each_index),
> > > directly adjusting the constant offset so you can feed
> > > fold_ctor_reference the constant offsets of all elements that need to
> > > be considered.  As optimization fold_ctor_reference would know how
> > > to start from the "last" offset (much refactoring would need to be
> > > done here given nested ctors and multiple indices I guess).
> > 
> > But for this, don't you want to take it over?
> 
> I can try.  Is there a PR for this?

Ok, filed PR80603, it is now all yours.

> > I agree that the current implementation is not very efficient and that is
> > why it is also limited to that small number of iterations.
> > As many cases just aren't able to use the valueize callback, handling
> > arbitrary numbers of non-constant indexes would be harder.
> 
> Sure.  I'd have expected you simply handle ARRAY_REF of a VAR_DECL
> and nothing else ;)

That would be too simple and boring ;)

Jakub


[RFC][PATCH] Introduce -fdump*-folding

2017-05-03 Thread Martin Liška
Hello

Last release cycle I spent quite some time with reading of IVOPTS pass
dump file. Using -fdump*-details causes to generate a lot of 'Applying pattern'
lines, which can make reading of a dump file more complicated.

There are stats for tramp3d with -O2 and -fdump-tree-all-details. Percentage 
number
shows how many lines are of the aforementioned pattern:

tramp3d-v4.cpp.164t.ivopts: 6.34%
  tramp3d-v4.cpp.091t.ccp2: 5.04%
  tramp3d-v4.cpp.093t.cunrolli: 4.41%
  tramp3d-v4.cpp.129t.laddress: 3.70%
  tramp3d-v4.cpp.032t.ccp1: 2.31%
  tramp3d-v4.cpp.038t.evrp: 1.90%
 tramp3d-v4.cpp.033t.forwprop1: 1.74%
  tramp3d-v4.cpp.103t.vrp1: 1.52%
 tramp3d-v4.cpp.124t.forwprop3: 1.31%
  tramp3d-v4.cpp.181t.vrp2: 1.30%
   tramp3d-v4.cpp.161t.cunroll: 1.22%
tramp3d-v4.cpp.027t.fixup_cfg3: 1.11%
   tramp3d-v4.cpp.153t.ivcanon: 1.07%
  tramp3d-v4.cpp.126t.ccp3: 0.96%
  tramp3d-v4.cpp.143t.sccp: 0.91%
 tramp3d-v4.cpp.185t.forwprop4: 0.82%
   tramp3d-v4.cpp.011t.cfg: 0.74%
 tramp3d-v4.cpp.096t.forwprop2: 0.50%
tramp3d-v4.cpp.019t.fixup_cfg1: 0.37%
 tramp3d-v4.cpp.120t.phicprop1: 0.33%
   tramp3d-v4.cpp.133t.pre: 0.32%
 tramp3d-v4.cpp.182t.phicprop2: 0.27%
tramp3d-v4.cpp.170t.veclower21: 0.25%
   tramp3d-v4.cpp.029t.einline: 0.24%

I'm suggesting to add new TDF that will be allocated for that.
Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.

Thoughts?
Martin
>From c1b832212576fd9f89fd738ae0cc98e9fb189c1d Mon Sep 17 00:00:00 2001
From: marxin 
Date: Wed, 1 Feb 2017 15:34:52 +0100
Subject: [PATCH] Introduce -fdump*-folding

gcc/ChangeLog:

2017-05-03  Martin Liska  

	* dumpfile.c: Add TDF_FOLDING.
	* dumpfile.h (enum tree_dump_index): Add to the enum.
	* genmatch.c (dt_simplify::gen_1): Use the newly added enum
	value.
---
 gcc/dumpfile.c | 1 +
 gcc/dumpfile.h | 7 ---
 gcc/genmatch.c | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/dumpfile.c b/gcc/dumpfile.c
index 6b9a47c5a26..b9c881c103f 100644
--- a/gcc/dumpfile.c
+++ b/gcc/dumpfile.c
@@ -111,6 +111,7 @@ static const struct dump_option_value_info dump_options[] =
   {"enumerate_locals", TDF_ENUMERATE_LOCALS},
   {"scev", TDF_SCEV},
   {"gimple", TDF_GIMPLE},
+  {"folding", TDF_FOLDING},
   {"optimized", MSG_OPTIMIZED_LOCATIONS},
   {"missed", MSG_MISSED_OPTIMIZATION},
   {"note", MSG_NOTE},
diff --git a/gcc/dumpfile.h b/gcc/dumpfile.h
index fef58f5e9b1..69c4ec0f861 100644
--- a/gcc/dumpfile.h
+++ b/gcc/dumpfile.h
@@ -84,9 +84,10 @@ enum tree_dump_index
 #define TDF_SCEV	(1 << 24)	/* Dump SCEV details.  */
 #define TDF_COMMENT	(1 << 25)	/* Dump lines with prefix ";;"  */
 #define TDF_GIMPLE	(1 << 26)	/* Dump in GIMPLE FE syntax  */
-#define MSG_OPTIMIZED_LOCATIONS  (1 << 27)  /* -fopt-info optimized sources */
-#define MSG_MISSED_OPTIMIZATION  (1 << 28)  /* missed opportunities */
-#define MSG_NOTE (1 << 29)  /* general optimization info */
+#define TDF_FOLDING	(1 << 27)	/* Dump folding details.  */
+#define MSG_OPTIMIZED_LOCATIONS  (1 << 28)  /* -fopt-info optimized sources */
+#define MSG_MISSED_OPTIMIZATION  (1 << 29)  /* missed opportunities */
+#define MSG_NOTE (1 << 30)  /* general optimization info */
 #define MSG_ALL (MSG_OPTIMIZED_LOCATIONS | MSG_MISSED_OPTIMIZATION \
  | MSG_NOTE)
 
diff --git a/gcc/genmatch.c b/gcc/genmatch.c
index 5621aa05b59..979d6856084 100644
--- a/gcc/genmatch.c
+++ b/gcc/genmatch.c
@@ -3187,7 +3187,7 @@ dt_simplify::gen_1 (FILE *f, int indent, bool gimple, operand *result)
 	}
 }
 
-  fprintf_indent (f, indent, "if (dump_file && (dump_flags & TDF_DETAILS)) "
+  fprintf_indent (f, indent, "if (dump_file && (dump_flags & TDF_FOLDING)) "
 	   "fprintf (dump_file, \"Applying pattern ");
   output_line_directive (f,
 			 result ? result->location : s->match->location, true);
-- 
2.12.2



Re: [RFC][PATCH] Introduce -fdump*-folding

2017-05-03 Thread Andrew Pinski
On Wed, May 3, 2017 at 1:10 AM, Martin Liška  wrote:
> Hello
>
> Last release cycle I spent quite some time with reading of IVOPTS pass
> dump file. Using -fdump*-details causes to generate a lot of 'Applying 
> pattern'
> lines, which can make reading of a dump file more complicated.

Yes the folding message can get annoying.  Especially when it is not
clear what is being folded which is the case a lot of the time I have
seen the message.

Thanks,
Andrew

>
> There are stats for tramp3d with -O2 and -fdump-tree-all-details. Percentage 
> number
> shows how many lines are of the aforementioned pattern:
>
> tramp3d-v4.cpp.164t.ivopts: 6.34%
>   tramp3d-v4.cpp.091t.ccp2: 5.04%
>   tramp3d-v4.cpp.093t.cunrolli: 4.41%
>   tramp3d-v4.cpp.129t.laddress: 3.70%
>   tramp3d-v4.cpp.032t.ccp1: 2.31%
>   tramp3d-v4.cpp.038t.evrp: 1.90%
>  tramp3d-v4.cpp.033t.forwprop1: 1.74%
>   tramp3d-v4.cpp.103t.vrp1: 1.52%
>  tramp3d-v4.cpp.124t.forwprop3: 1.31%
>   tramp3d-v4.cpp.181t.vrp2: 1.30%
>tramp3d-v4.cpp.161t.cunroll: 1.22%
> tramp3d-v4.cpp.027t.fixup_cfg3: 1.11%
>tramp3d-v4.cpp.153t.ivcanon: 1.07%
>   tramp3d-v4.cpp.126t.ccp3: 0.96%
>   tramp3d-v4.cpp.143t.sccp: 0.91%
>  tramp3d-v4.cpp.185t.forwprop4: 0.82%
>tramp3d-v4.cpp.011t.cfg: 0.74%
>  tramp3d-v4.cpp.096t.forwprop2: 0.50%
> tramp3d-v4.cpp.019t.fixup_cfg1: 0.37%
>  tramp3d-v4.cpp.120t.phicprop1: 0.33%
>tramp3d-v4.cpp.133t.pre: 0.32%
>  tramp3d-v4.cpp.182t.phicprop2: 0.27%
> tramp3d-v4.cpp.170t.veclower21: 0.25%
>tramp3d-v4.cpp.029t.einline: 0.24%
>
> I'm suggesting to add new TDF that will be allocated for that.
> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>
> Thoughts?
> Martin


[PATCH] Improve vectorizer peeling for alignment costmodel

2017-05-03 Thread Richard Biener

The following extends the very simplistic cost modeling I added somewhen
late in the release process to, for all unknown misaligned refs, also
apply this model for loops containing stores.

The model basically says it's useless to peel for alignment if there's
only a single DR that is affected or if, in case we'll end up using
hw-supported misaligned loads, the cost of misaligned loads is the same
as of aligned ones.  Previously we'd usually align one of the stores
with the theory that this improves (precious) store-bandwith.

Note this is only a so slightly conservative (aka less peeling).  We'll
still apply peeling for alignment if you make the testcase use +=
because then we'll align both the load and the store from v1.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Richard.

2017-05-03  Richard Biener  

* tree-vect-data-refs.c (vect_enhance_data_refs_alignment):
When all DRs have unknown misaligned do not always peel
when there is a store but apply the same costing model as if
there were only loads.

* gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c: New testcase.

Index: gcc/tree-vect-data-refs.c
===
--- gcc/tree-vect-data-refs.c   (revision 247498)
+++ gcc/tree-vect-data-refs.c   (working copy)
@@ -1715,18 +1741,18 @@ vect_enhance_data_refs_alignment (loop_v
 dr0 = first_store;
 }
 
-  /* In case there are only loads with different unknown misalignments, use
- peeling only if it may help to align other accesses in the loop or
+  /* Use peeling only if it may help to align other accesses in the loop or
 if it may help improving load bandwith when we'd end up using
 unaligned loads.  */
   tree dr0_vt = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr0)));
-  if (!first_store
- && !STMT_VINFO_SAME_ALIGN_REFS (
- vinfo_for_stmt (DR_STMT (dr0))).length ()
+  if (STMT_VINFO_SAME_ALIGN_REFS
+   (vinfo_for_stmt (DR_STMT (dr0))).length () == 0
  && (vect_supportable_dr_alignment (dr0, false)
  != dr_unaligned_supported
- || (builtin_vectorization_cost (vector_load, dr0_vt, 0)
- == builtin_vectorization_cost (unaligned_load, dr0_vt, -1
+ || (DR_IS_READ (dr0)
+ && (builtin_vectorization_cost (vector_load, dr0_vt, 0)
+ == builtin_vectorization_cost (unaligned_load,
+dr0_vt, -1)
 do_peeling = false;
 }
 

Index: gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c
===
--- gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c
(nonexistent)
+++ gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-alignpeel.c
(working copy)
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+
+void func(double * __restrict__ v1, double * v2, unsigned n)
+{
+  for (unsigned i = 0; i < n; ++i)
+v1[i] = v2[i];
+}
+
+/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" 
"vect" } } */


Re: [PATCH 09/12] [i386] Add patterns and predicates foutline-msabi-xlouges

2017-05-03 Thread Uros Bizjak
On Wed, May 3, 2017 at 9:38 AM, Daniel Santos  wrote:
> On 05/03/2017 01:10 AM, Uros Bizjak wrote:
>>
>> The order of subexpressions of parallel in general does not matter.
>
>
> Thanks, this makes things much clearer.
>
>>> Also, I'm wondering if there's anything wrong with calling ix86_gen_leave
>>> ()
>>> and plucking the insns out of the generated parallel insn and moving that
>>> into my own parallel rather than generating them in my own function.  I
>>> guess all the matters is what is cleanest.
>>
>> Hm... I'd rather see subexpressions generated "by hand".
>
>
> OK.  While we're on the topic, are you OK with my changes to ix86_emit_leave
> to generate the notes or would you prefer those by hand as well?

I think they are OK. We are effectively emitting a leave here.

> Also, are these predicates what you had in mind?  (I haven't actually tested
> them just yet.)

Yes, these look good to me.

Uros.

> (define_predicate "save_multiple"
>   (match_code "parallel")
> {
>   const unsigned len = XVECLEN (op, 0);
>   unsigned i;
>
>   /* Starting from end of vector, count register saves.  */
>   for (i = 0; i < len; ++i)
> {
>   rtx src, dest, addr;
>   rtx e = XVECEXP (op, 0, len - 1 - i);
>
>   if (GET_CODE (e) != SET)
> break;
>
>   src  = SET_SRC (e);
>   dest = SET_DEST (e);
>
>   if (!REG_P (src) || !MEM_P (dest))
> break;
>
>   addr = XEXP (dest, 0);
>
>   /* Good if dest address is in RAX.  */
>   if (REG_P (addr) && REGNO (addr) == AX_REG)
> continue;
>
>   /* Good if dest address is offset of RAX.  */
>   if (GET_CODE (addr) == PLUS
>   && REG_P (XEXP (addr, 0))
>   && REGNO (XEXP (addr, 0)) == AX_REG)
> continue;
>
>   break;
> }
>   return (i >= 12 && i <= 18);
> })
>
>
> (define_predicate "restore_multiple"
>   (match_code "parallel")
> {
>   const unsigned len = XVECLEN (op, 0);
>   unsigned i;
>
>   /* Starting from end of vector, count register restores.  */
>   for (i = 0; i < len; ++i)
> {
>   rtx src, dest, addr;
>   rtx e = XVECEXP (op, 0, len - 1 - i);
>
>   if (GET_CODE (e) != SET)
> break;
>
>   src  = SET_SRC (e);
>   dest = SET_DEST (e);
>
>   if (!MEM_P (src) || !REG_P (dest))
> break;
>
>   addr = XEXP (src, 0);
>
>   /* Good if src address is in RSI.  */
>   if (REG_P (addr) && REGNO (addr) == SI_REG)
> continue;
>
>   /* Good if src address is offset of RSI.  */
>   if (GET_CODE (addr) == PLUS
>   && REG_P (XEXP (addr, 0))
>   && REGNO (XEXP (addr, 0)) == SI_REG)
> continue;
>
>   break;
> }
>   return (i >= 12 && i <= 18);
> })
>
>
> Thanks,
> Daniel
>


Re: [PATCH GCC8][01/33]Handle TRUNCATE between tieable modes in rtx_cost

2017-05-03 Thread Bin.Cheng
On Wed, May 3, 2017 at 7:17 AM, Eric Botcazou  wrote:
>> 2017-04-11  Bin Cheng  
>>
>>   * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes.
>
> This breaks bootstrap with RTL checking:
>
> /home/eric/build/gcc/native/./gcc/xgcc -B/home/eric/build/gcc/native/./gcc/ -
> nostdinc -x c /dev/null -S -o /dev/null -fself-
> test=/home/eric/svn/gcc/gcc/testsuite/selftests
> cc1: internal compiler error: RTL check: expected code 'subreg', have
> 'truncate' in rtx_cost, at rtlanal.c:4169
> 0xbae338 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int,
> char const*)
> /home/eric/svn/gcc/gcc/rtl.c:829
> 0xbbc9b4 rtx_cost(rtx_def*, machine_mode, rtx_code, int, bool)
> /home/eric/svn/gcc/gcc/rtlanal.c:4169
> 0x8517e6 set_src_cost
> /home/eric/svn/gcc/gcc/rtl.h:2685
> 0x8517e6 init_expmed_one_conv
> /home/eric/svn/gcc/gcc/expmed.c:142
> 0x8517e6 init_expmed_one_mode
> /home/eric/svn/gcc/gcc/expmed.c:209
> 0x853fb2 init_expmed()
> /home/eric/svn/gcc/gcc/expmed.c:270
> 0xc45974 backend_init_target
> /home/eric/svn/gcc/gcc/toplev.c:1665
> 0xc45974 initialize_rtl()
>
Sorry for disturbing, I will revert this if can't fix today.

Thanks,
bin
> --
> Eric Botcazou


Re: [PATCH] Allow building GCC with PTX offloading even without CUDA being installed (gcc and nvptx-tools patches)

2017-05-03 Thread Jakub Jelinek
On Sat, Jan 21, 2017 at 03:50:43PM +0100, Thomas Schwinge wrote:
> > In order to configure gcc to load libcuda.so.1 dynamically,
> > one has to either configure it --without-cuda-driver, or without
> > --with-cuda-driver=/--with-cuda-driver-lib=/--with-cuda-driver-include=
> > options if cuda.h and -lcuda aren't found in the default locations.
> 
> Would be good to have that documented ;-) -- done.
> 
> > The nvptx-tools change
> 
> (I'll get to that later.)

I'd like to ping the nvptx-tools change.  Shall I make a github pull request
for that?

I have additional following two further patches, the first one just to shut
up -Wformat-security warning, the other one discovered today to fix build
against glibc trunk - they have changed getopt related includes there and
we get:
In file included from /usr/include/bits/getopt_posix.h:27:0,
 from /usr/include/unistd.h:872,
 from ../nvptx-ld.c:23:
/usr/include/bits/getopt_core.h:91:12: error: declaration of 'int getopt(int, 
char* const*, const char*) throw ()' has a different exception specifier
 extern int getopt (int ___argc, char *const *___argv, const char *__shortopts)
^~
In file included from ../nvptx-ld.c:22:0:
../include/getopt.h:113:12: note: from previous declaration 'int getopt(int, 
char* const*, const char*)'
 extern int getopt (int argc, char *const *argv, const char *shortopts);
^~

Jakub
--- nvptx-tools/configure.ac
+++ nvptx-tools/configure.ac
@@ -51,6 +51,7 @@ LIBS="$LIBS -lcuda"
 AC_CHECK_FUNCS([[cuGetErrorName] [cuGetErrorString]])
 AC_CHECK_DECLS([[cuGetErrorName], [cuGetErrorString]],
   [], [], [[#include ]])
+AC_CHECK_HEADERS(unistd.h sys/stat.h)
 
 AC_MSG_CHECKING([for extra programs to build requiring -lcuda])
 NVPTX_RUN=
--- nvptx-tools/include/libiberty.h
+++ nvptx-tools/include/libiberty.h
@@ -390,6 +390,17 @@ extern void hex_init (void);
 /* Save files used for communication between processes.  */
 #define PEX_SAVE_TEMPS 0x4
 
+/* Max number of alloca bytes per call before we must switch to malloc.
+
+   ?? Swiped from gnulib's regex_internal.h header.  Is this actually
+   the case?  This number seems arbitrary, though sane.
+
+   The OS usually guarantees only one guard page at the bottom of the stack,
+   and a page size can be as small as 4096 bytes.  So we cannot safely
+   allocate anything larger than 4096 bytes.  Also care for the possibility
+   of a few compiler-allocated temporary stack slots.  */
+#define MAX_ALLOCA_SIZE4032
+
 /* Prepare to execute one or more programs, with standard output of
each program fed to standard input of the next.
FLAGS   As above.
--- nvptx-tools/nvptx-as.c
+++ nvptx-tools/nvptx-as.c
@@ -30,6 +30,9 @@
 #include 
 #include 
 #include 
+#ifdef HAVE_SYS_STAT_H
+#include 
+#endif
 #include 
 #define obstack_chunk_alloc malloc
 #define obstack_chunk_free free
@@ -42,6 +45,38 @@
 
 #include "version.h"
 
+#ifndef R_OK
+#define R_OK 4
+#define W_OK 2
+#define X_OK 1
+#endif
+
+#ifndef DIR_SEPARATOR
+#  define DIR_SEPARATOR '/'
+#endif
+
+#if defined (_WIN32) || defined (__MSDOS__) \
+|| defined (__DJGPP__) || defined (__OS2__)
+#  define HAVE_DOS_BASED_FILE_SYSTEM
+#  define HAVE_HOST_EXECUTABLE_SUFFIX
+#  define HOST_EXECUTABLE_SUFFIX ".exe"
+#  ifndef DIR_SEPARATOR_2 
+#define DIR_SEPARATOR_2 '\\'
+#  endif
+#  define PATH_SEPARATOR ';'
+#else
+#  define PATH_SEPARATOR ':'
+#endif
+
+#ifndef DIR_SEPARATOR_2
+#  define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
+#else
+#  define IS_DIR_SEPARATOR(ch) \
+   (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
+#endif
+
+#define DIR_UP ".."
+
 static const char *outname = NULL;
 
 static void __attribute__ ((format (printf, 1, 2)))
@@ -816,7 +851,7 @@ traverse (void **slot, void *data)
 }
 
 static void
-process (FILE *in, FILE *out)
+process (FILE *in, FILE *out, int verify, const char *outname)
 {
   symbol_table = htab_create (500, hash_string_hash, hash_string_eq,
   NULL);
@@ -824,6 +859,18 @@ process (FILE *in, FILE *out)
   const char *input = read_file (in);
   Token *tok = tokenize (input);
 
+  /* By default, when ptxas is not in PATH, do minimalistic verification,
+ just require that the first non-comment directive is .version.  */
+  if (verify < 0)
+{
+  size_t i;
+  for (i = 0; tok[i].kind == K_comment; i++)
+   ;
+  if (tok[i].kind != K_dotted || !is_keyword (&tok[i], "version"))
+   fatal_error ("missing .version directive at start of file '%s'",
+outname);
+}
+
   do
 tok = parse_file (tok);
   while (tok->kind);
@@ -897,9 +944,83 @@ fork_execute (const char *prog, char *const *argv)
   do_wait (prog, pex);
 }
 
+/* Determine if progname is available in PATH.  */
+static bool
+program_available (const char *progname)
+{
+  char *temp = getenv ("PATH");
+  if (temp)
+{
+  char *startp, *endp, *nstore, *alloc_ptr = NULL;
+  size_t

Re: [gcn][patch] Add -mgpu option and plumb in assembler/linker

2017-05-03 Thread Andrew Stubbs

On 02/05/17 18:08, Martin Jambor wrote:

Hi Andrew,

sorry for replying only now but yesterday was public holiday here and
I am still only in the process of recovering from a long weekend.


No problem, the UK had the same. :-)


While the only objection I have is the C++ style comment in
config/gcn/gcn.c, another problem, for me at least...

On Fri, Apr 28, 2017 at 06:06:39PM +0100, Andrew Stubbs wrote:

This patch, for the "gcn" branch, does three things:

1. Add specs to drive the LLVM assembler and linker. It requires them to be
installed as "as" and "ld", under $target/bin, but then the compiler Just
Works with these specs.


...is that I do not have llvm linker at hand and without it I did not
manage to make the patch produce loadable code.  Because ROCm 1.5 has
been released today, I will update our environment, which is a bit
obsolete, get llvm ld and try again.  This might take me a few days,
so please bear with me for a little more, I would like to make sure it
works on carrizos.


Understood. I do not have a carrizo to test on.

All my testing will be with FuryX Fiji discrete GPUs. The main 
difference, from the software point of view, is that the shared memory 
must be handled a little differently. (The existing HSA back-end appears 
incompatible, as are the samples from the HSA Foundation sources; the 
ROCm samples work fine.) However, the HSACO binary must encode the right 
magic numbers or the driver rejects it, hence the need for this patch.


Andrew


[PATCH] Revert part of PR80492

2017-05-03 Thread Richard Biener

This change unnecessarily pessimizes some cases.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

2017-05-03  Richard Biener  

Revert
PR tree-optimization/80492
* tree-ssa-alias.c (decl_refs_may_alias_p): Handle
compare_base_decls returning dont-know properly.

Index: gcc/tree-ssa-alias.c
===
--- gcc/tree-ssa-alias.c(revision 247492)
+++ gcc/tree-ssa-alias.c(working copy)
@@ -1096,16 +1096,13 @@ decl_refs_may_alias_p (tree ref1, tree b
 {
   gcc_checking_assert (DECL_P (base1) && DECL_P (base2));
 
-  int cmp = compare_base_decls (base1, base2);
-
   /* If both references are based on different variables, they cannot alias.  
*/
-  if (cmp == 0)
+  if (compare_base_decls (base1, base2) == 0)
 return false;
 
   /* If both references are based on the same variable, they cannot alias if
  the accesses do not overlap.  */
-  if (cmp == 1
-  && !ranges_overlap_p (offset1, max_size1, offset2, max_size2))
+  if (!ranges_overlap_p (offset1, max_size1, offset2, max_size2))
 return false;
 
   /* For components with variable position, the above test isn't sufficient,


Re: [PATCH, GCC/ARM, stage4] Set mode for success result of atomic compare and swap

2017-05-03 Thread Thomas Preudhomme

Hi Kyrill,

On 19/04/17 14:34, Kyrill Tkachov wrote:

Hi Thomas,

On 12/04/17 09:59, Thomas Preudhomme wrote:

Hi,

Currently atomic_compare_and_swap_1 define_insn do not have a mode
set for the destination of the set indicating the success result of the
instruction. This is because the operand can be either a CC_Z register
(for 32-bit targets) or a SI register (for 16-bit Thumb targets). This
result in lack of checking for the mode.

This commit use a new CCSI iterator to solve this issue while avoiding
duplication of the patterns. The insn name are kept unique by using
attributes tied to the iterator (SIDI:mode and CCSI:arch) instead of
usign the builtin mode attribute. Expander arm_expand_compare_and_swap
is also adapted accordingly.

ChangeLog entry is as follows:

*** gcc/ChangeLog ***

2017-04-11  Thomas Preud'homme  

* config/arm/iterators.md (CCSI): New mode iterator.
(arch): New mode attribute.
* config/arm/sync.md (atomic_compare_and_swap_1): Rename into ...
(atomic_compare_and_swap_1): This and ...
(atomic_compare_and_swap_1): This.  Use CCSI
code iterator for success result mode.
* config/arm/arm.c (arm_expand_compare_and_swap): Adapt code to use
the corresponding new insn generators.

Testing: arm-none-eabi cross-compiler built successfully for ARMv8-M
Mainline and Baseline without the lack of destination mode warning in
sync.md. Testsuite show no regression.



Thanks for fixing these warnings.
The code looks ok to me but
I'd like to make sure that the rest of the arm atomic targets are not adversely
affected,
so please also do a test run for ARMv7-A and ARMv8-A targets.
Also, a bootstrap is required as always.


Hi Kyrill,

Bootstrapped and ran the testsuite for both ARMv7-A and ARMv8-A in both ARM and 
Thumb mode without any regression. I've also verified that a number of atomic 
related testcases [1][2] get the same code generation for ARMv7-A in ARM and 
Thumb mode as well as ARMv8-M Baseline.


[1] For ARMv7-A ARM and Thumb mode, the following testcases were considered:

gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c
gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c
gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c
gcc/testsuite/gcc.dg/atomic-exchange-1.c
gcc/testsuite/gcc.dg/atomic-exchange-2.c
gcc/testsuite/gcc.dg/atomic-exchange-3.c
gcc/testsuite/gcc.dg/atomic-fence.c
gcc/testsuite/gcc.dg/atomic-flag.c
gcc/testsuite/gcc.dg/atomic-generic.c
gcc/testsuite/gcc.dg/atomic-generic-aux.c
gcc/testsuite/gcc.dg/atomic-invalid-2.c
gcc/testsuite/gcc.dg/atomic-load-1.c
gcc/testsuite/gcc.dg/atomic-load-2.c
gcc/testsuite/gcc.dg/atomic-load-3.c
gcc/testsuite/gcc.dg/atomic-lockfree.c
gcc/testsuite/gcc.dg/atomic-lockfree-aux.c
gcc/testsuite/gcc.dg/atomic-noinline.c
gcc/testsuite/gcc.dg/atomic-noinline-aux.c
gcc/testsuite/gcc.dg/atomic-op-1.c
gcc/testsuite/gcc.dg/atomic-op-2.c
gcc/testsuite/gcc.dg/atomic-op-3.c
gcc/testsuite/gcc.dg/atomic-op-6.c
gcc/testsuite/gcc.dg/atomic-store-1.c
gcc/testsuite/gcc.dg/atomic-store-2.c
gcc/testsuite/gcc.dg/atomic-store-3.c
gcc/testsuite/g++.dg/ext/atomic-1.C
gcc/testsuite/g++.dg/ext/atomic-2.C
gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire-1.c
gcc/testsuite/gcc.target/arm/atomic-op-acq_rel-1.c
gcc/testsuite/gcc.target/arm/atomic-op-acquire-1.c
gcc/testsuite/gcc.target/arm/atomic-op-char-1.c
gcc/testsuite/gcc.target/arm/atomic-op-consume-1.c
gcc/testsuite/gcc.target/arm/atomic-op-int-1.c
gcc/testsuite/gcc.target/arm/atomic-op-relaxed-1.c
gcc/testsuite/gcc.target/arm/atomic-op-release-1.c
gcc/testsuite/gcc.target/arm/atomic-op-seq_cst-1.c
gcc/testsuite/gcc.target/arm/atomic-op-short-1.c
gcc/testsuite/gcc.target/arm/atomic_loaddi_1.c
gcc/testsuite/gcc.target/arm/atomic_loaddi_4.c
gcc/testsuite/gcc.target/arm/atomic_loaddi_7.c
gcc/testsuite/gcc.target/arm/sync-1.c
gcc/testsuite/gcc.target/arm/synchronize.c
gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c
gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c
gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c
gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c
libstdc++-v3/testsuite/29_atomics/atomic/60658.cc
libstdc++-v3/testsuite/29_atomics/atomic/62259.cc
libstdc++-v3/testsuite/29_atomics/atomic/64658.cc
libstdc++-v3/testsuite/29_atomics/atomic/65147.cc
libstdc++-v3/testsuite/29_atomics/atomic/65913.cc
libstdc++-v3/testsuite/29_atomics/atomic/70766.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/49445.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/constexpr.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/copy_list.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/default.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/direct_list.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/single_value.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/user_pod.cc
libstdc++-v3/testsuite/29_atomics/atomic/operators/51811.cc
libstdc++-v3/testsuite/29_atomics/atomic/operators/56011.cc
libstdc++-v3/testsuite/29_atomics/atomic/operators/integral_assignment.cc
libstdc++-v3/testsuite/29_a

Re: [PATCH, GCC/ARM, stage4] Set mode for success result of atomic compare and swap

2017-05-03 Thread Kyrill Tkachov

Hi Thomas,

On 03/05/17 10:39, Thomas Preudhomme wrote:

Hi Kyrill,

On 19/04/17 14:34, Kyrill Tkachov wrote:

Hi Thomas,

On 12/04/17 09:59, Thomas Preudhomme wrote:

Hi,

Currently atomic_compare_and_swap_1 define_insn do not have a mode
set for the destination of the set indicating the success result of the
instruction. This is because the operand can be either a CC_Z register
(for 32-bit targets) or a SI register (for 16-bit Thumb targets). This
result in lack of checking for the mode.

This commit use a new CCSI iterator to solve this issue while avoiding
duplication of the patterns. The insn name are kept unique by using
attributes tied to the iterator (SIDI:mode and CCSI:arch) instead of
usign the builtin mode attribute. Expander arm_expand_compare_and_swap
is also adapted accordingly.

ChangeLog entry is as follows:

*** gcc/ChangeLog ***

2017-04-11  Thomas Preud'homme 

* config/arm/iterators.md (CCSI): New mode iterator.
(arch): New mode attribute.
* config/arm/sync.md (atomic_compare_and_swap_1): Rename into ...
(atomic_compare_and_swap_1): This and ...
(atomic_compare_and_swap_1): This.  Use CCSI
code iterator for success result mode.
* config/arm/arm.c (arm_expand_compare_and_swap): Adapt code to use
the corresponding new insn generators.

Testing: arm-none-eabi cross-compiler built successfully for ARMv8-M
Mainline and Baseline without the lack of destination mode warning in
sync.md. Testsuite show no regression.



Thanks for fixing these warnings.
The code looks ok to me but
I'd like to make sure that the rest of the arm atomic targets are not adversely
affected,
so please also do a test run for ARMv7-A and ARMv8-A targets.
Also, a bootstrap is required as always.


Hi Kyrill,

Bootstrapped and ran the testsuite for both ARMv7-A and ARMv8-A in both ARM and Thumb mode without any regression. I've also verified that a number of atomic related testcases [1][2] get the same code generation for ARMv7-A in ARM and 
Thumb mode as well as ARMv8-M Baseline.


[1] For ARMv7-A ARM and Thumb mode, the following testcases were considered:

gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c
gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c
gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c
gcc/testsuite/gcc.dg/atomic-exchange-1.c
gcc/testsuite/gcc.dg/atomic-exchange-2.c
gcc/testsuite/gcc.dg/atomic-exchange-3.c
gcc/testsuite/gcc.dg/atomic-fence.c
gcc/testsuite/gcc.dg/atomic-flag.c
gcc/testsuite/gcc.dg/atomic-generic.c
gcc/testsuite/gcc.dg/atomic-generic-aux.c
gcc/testsuite/gcc.dg/atomic-invalid-2.c
gcc/testsuite/gcc.dg/atomic-load-1.c
gcc/testsuite/gcc.dg/atomic-load-2.c
gcc/testsuite/gcc.dg/atomic-load-3.c
gcc/testsuite/gcc.dg/atomic-lockfree.c
gcc/testsuite/gcc.dg/atomic-lockfree-aux.c
gcc/testsuite/gcc.dg/atomic-noinline.c
gcc/testsuite/gcc.dg/atomic-noinline-aux.c
gcc/testsuite/gcc.dg/atomic-op-1.c
gcc/testsuite/gcc.dg/atomic-op-2.c
gcc/testsuite/gcc.dg/atomic-op-3.c
gcc/testsuite/gcc.dg/atomic-op-6.c
gcc/testsuite/gcc.dg/atomic-store-1.c
gcc/testsuite/gcc.dg/atomic-store-2.c
gcc/testsuite/gcc.dg/atomic-store-3.c
gcc/testsuite/g++.dg/ext/atomic-1.C
gcc/testsuite/g++.dg/ext/atomic-2.C
gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire-1.c
gcc/testsuite/gcc.target/arm/atomic-op-acq_rel-1.c
gcc/testsuite/gcc.target/arm/atomic-op-acquire-1.c
gcc/testsuite/gcc.target/arm/atomic-op-char-1.c
gcc/testsuite/gcc.target/arm/atomic-op-consume-1.c
gcc/testsuite/gcc.target/arm/atomic-op-int-1.c
gcc/testsuite/gcc.target/arm/atomic-op-relaxed-1.c
gcc/testsuite/gcc.target/arm/atomic-op-release-1.c
gcc/testsuite/gcc.target/arm/atomic-op-seq_cst-1.c
gcc/testsuite/gcc.target/arm/atomic-op-short-1.c
gcc/testsuite/gcc.target/arm/atomic_loaddi_1.c
gcc/testsuite/gcc.target/arm/atomic_loaddi_4.c
gcc/testsuite/gcc.target/arm/atomic_loaddi_7.c
gcc/testsuite/gcc.target/arm/sync-1.c
gcc/testsuite/gcc.target/arm/synchronize.c
gcc/testsuite/gcc.target/arm/armv8-sync-comp-swap.c
gcc/testsuite/gcc.target/arm/armv8-sync-op-acquire.c
gcc/testsuite/gcc.target/arm/armv8-sync-op-full.c
gcc/testsuite/gcc.target/arm/armv8-sync-op-release.c
libstdc++-v3/testsuite/29_atomics/atomic/60658.cc
libstdc++-v3/testsuite/29_atomics/atomic/62259.cc
libstdc++-v3/testsuite/29_atomics/atomic/64658.cc
libstdc++-v3/testsuite/29_atomics/atomic/65147.cc
libstdc++-v3/testsuite/29_atomics/atomic/65913.cc
libstdc++-v3/testsuite/29_atomics/atomic/70766.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/49445.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/constexpr.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/copy_list.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/default.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/direct_list.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/single_value.cc
libstdc++-v3/testsuite/29_atomics/atomic/cons/user_pod.cc
libstdc++-v3/testsuite/29_atomics/atomic/operators/51811.cc
libstdc++-v3/testsuite/29_atomics/atomic/operators/56011.cc
libstdc++-v3/testsuite/29_atomics/atomic/operators/inte

[PATCH] Doxygen: add default location for filters and output folder.

2017-05-03 Thread Martin Liška
On 04/28/2017 02:03 PM, Martin Liška wrote:
> Why do we not make a default for OUTPUT_DIRECTORY and INPUT_FILTER ?
> I would expect people are running doxygen from GCC root folder.

Hi.

I'm suggesting following patch for that.

Martin
>From 4623de7cf43598de0f778dc976a3b219220716e3 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Wed, 3 May 2017 11:42:41 +0200
Subject: [PATCH] Doxygen: add default location for filters and output folder.

contrib/ChangeLog:

2017-05-03  Martin Liska  

	* gcc.doxy: Add default location for filters and output folder.
---
 contrib/gcc.doxy | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/contrib/gcc.doxy b/contrib/gcc.doxy
index 7a284e754aa..a8eeb03c9a0 100644
--- a/contrib/gcc.doxy
+++ b/contrib/gcc.doxy
@@ -11,16 +11,12 @@
 # Values that contain spaces should be placed between quotes (" ")
 
 
-#-
-# NOTE: YOU MUST EDIT THE FOLLOWING HARDWIRED PATHS BEFORE USING THIS FILE.
-#-
-
 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
 # base path where the generated documentation will be put. 
 # If a relative path is entered, it will be relative to the location 
 # where doxygen was started. If left blank the current directory will be used.
 
-OUTPUT_DIRECTORY   = @OUTPUT_DIRECTORY@
+OUTPUT_DIRECTORY   = gcc-doxygen
 
 # The INPUT_FILTER tag can be used to specify a program that doxygen should 
 # invoke to filter for each input file. Doxygen will invoke the filter program 
@@ -30,7 +26,7 @@ OUTPUT_DIRECTORY   = @OUTPUT_DIRECTORY@
 # to standard output.  If FILTER_PATTERNS is specified, this tag will be 
 # ignored.
 
-INPUT_FILTER   = @INPUT_FILTER@
+INPUT_FILTER   = contrib/filter_gcc_for_doxygen
 
 #-
 
-- 
2.12.2



Re: [PATCH GCC8][07/33]Offset validity check in address expression

2017-05-03 Thread Richard Biener
On Tue, May 2, 2017 at 7:06 PM, Bin.Cheng  wrote:
> On Mon, Apr 24, 2017 at 11:34 AM, Richard Biener
>  wrote:
>> On Tue, Apr 18, 2017 at 12:41 PM, Bin Cheng  wrote:
>>> Hi,
>>> For now, we check validity of offset by computing the maximum offset then 
>>> checking if
>>> offset is smaller than the max offset.  This is inaccurate, for example, 
>>> some targets
>>> may require offset to be aligned by power of 2.  This patch introduces new 
>>> interface
>>> checking validity of offset.  It also buffers rtx among different calls.
>>>
>>> Is it OK?
>>
>> -  static vec max_offset_list;
>> -
>> +  auto_vec addr_list;
>>as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
>>mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
>>
>> -  num = max_offset_list.length ();
>> +  num = addr_list.length ();
>>list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
>>if (list_index >= num)
>>
>> num here is always zero and thus the compare is always true.
>>
>> +  addr_list.safe_grow_cleared (list_index + MAX_MACHINE_MODE);
>> +  for (; num < addr_list.length (); num++)
>> +   addr_list[num] = NULL;
>>
>> the loop is now redundant (safe_grow_cleared)
>>
>> +  addr = addr_list[list_index];
>> +  if (!addr)
>>  {
>>
>> always true again...
>>
>> I wonder if you really indented to drop 'static' from addr_list?
>> There's no caching
>> across function calls.
> Right, the redundancy is because I tried to cache across function
> calls with declarations like:
>   static unsigned num = 0;
>   static GTY ((skip)) rtx *addr_list = NULL;
> But this doesn't work, the addr_list[list_index] still gets corrupted somehow.

Well, you need GTY (()), not GTY((skip)) on them.  Not sure if it works
for function-scope decls, you have to check.  Look at whether a GC
root is created for the variable in gt-tree-ssa-loop-ivopts.h (need to tweak
GTFILES in the makefile plus include that generated file).  tree-ssa-address.c
uses a global root for mem_addr_template_list for example.

Richard.



> Thanks,
> bin
>>
>> + /* Split group if aksed to, or the offset against the first
>> +use can't fit in offset part of addressing mode.  IV uses
>> +having the same offset are still kept in one group.  */
>> + if (offset != 0 &&
>> + (split_p || !addr_offset_valid_p (use, offset)))
>>
>> && goes to the next line.
>>
>> Richard.
>>
>>
>>
>>> Thanks,
>>> bin
>>> 2017-04-11  Bin Cheng  
>>>
>>> * tree-ssa-loop-ivopts.c (compute_max_addr_offset): Delete.
>>> (addr_offset_valid_p): New function.
>>> (split_address_groups): Check offset validity with above function.


Re: [PATCH] ggc-page loop

2017-05-03 Thread Richard Biener
On Wed, May 3, 2017 at 2:09 AM, Andrew Pinski  wrote:
> On Tue, May 2, 2017 at 3:41 PM, Nathan Sidwell  wrote:
>> This loop in ggc-page confused me, because the iterator is one greater than
>> the indexing value.  Also the formatting of the array indexing is incorrect.
>>
>> Fixed thusly, and applied as obvious after booting on x86_64-linux-gnu
>
> -  for (i = G.by_depth_in_use; i > 0; --i)
> +  for (unsigned i = G.by_depth_in_use; i--;)
>  {
> -  page_entry *p = G.by_depth[i-1];
> -  p->index_by_depth = i-1;
> +  page_entry *p = G.by_depth[i];
> +  p->index_by_depth = i;
>  }
>
> I think this is still incorrect.  you replaced i - 1 by i but did not
> start at G.by_depth_in_use - 1;

He does, albeit in a coding way I dislike (watch how the iterator
update is done in the condition!)

Richard.

>
> Thanks,
> Andrew
>
>
>>
>> nathan
>> --
>> Nathan Sidwell


Re: [PATCH GCC8][01/33]Handle TRUNCATE between tieable modes in rtx_cost

2017-05-03 Thread Bin.Cheng
On Wed, May 3, 2017 at 9:38 AM, Bin.Cheng  wrote:
> On Wed, May 3, 2017 at 7:17 AM, Eric Botcazou  wrote:
>>> 2017-04-11  Bin Cheng  
>>>
>>>   * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes.
>>
>> This breaks bootstrap with RTL checking:
>>
>> /home/eric/build/gcc/native/./gcc/xgcc -B/home/eric/build/gcc/native/./gcc/ -
>> nostdinc -x c /dev/null -S -o /dev/null -fself-
>> test=/home/eric/svn/gcc/gcc/testsuite/selftests
>> cc1: internal compiler error: RTL check: expected code 'subreg', have
>> 'truncate' in rtx_cost, at rtlanal.c:4169
>> 0xbae338 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int,
>> char const*)
>> /home/eric/svn/gcc/gcc/rtl.c:829
>> 0xbbc9b4 rtx_cost(rtx_def*, machine_mode, rtx_code, int, bool)
>> /home/eric/svn/gcc/gcc/rtlanal.c:4169
>> 0x8517e6 set_src_cost
>> /home/eric/svn/gcc/gcc/rtl.h:2685
>> 0x8517e6 init_expmed_one_conv
>> /home/eric/svn/gcc/gcc/expmed.c:142
>> 0x8517e6 init_expmed_one_mode
>> /home/eric/svn/gcc/gcc/expmed.c:209
>> 0x853fb2 init_expmed()
>> /home/eric/svn/gcc/gcc/expmed.c:270
>> 0xc45974 backend_init_target
>> /home/eric/svn/gcc/gcc/toplev.c:1665
>> 0xc45974 initialize_rtl()
>>
> Sorry for disturbing, I will revert this if can't fix today.
It looks bogus and I couldn't find the motivating case for it, so
revert with attached patch.  Build on x86 and commit as obvious.

Thanks,
bin
2017-05-03  Bin Cheng  

Revert
2017-05-02  Bin Cheng  
* rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes.
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index f18245f..321363f 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -4164,14 +4164,6 @@ rtx_cost (rtx x, machine_mode mode, enum rtx_code 
outer_code,
return COSTS_N_INSNS (2 + factor);
   break;
 
-case TRUNCATE:
-  /* If we can tie these modes, make this cheap.  */
-  if (MODES_TIEABLE_P (mode, GET_MODE (SUBREG_REG (x
-   {
- total = 0;
- break;
-   }
-  /* FALLTHRU */
 default:
   if (targetm.rtx_costs (x, mode, outer_code, opno, &total, speed))
return total;


Re: Alternative check for vector refs with same alignment

2017-05-03 Thread Richard Biener
On Wed, May 3, 2017 at 9:54 AM, Richard Sandiford
 wrote:
> vect_find_same_alignment_drs uses the ddr dependence distance
> to tell whether two references have the same alignment.  Although
> that's safe with the current code, there's no particular reason
> why a dependence distance of 0 should mean that the accesses start
> on the same byte.  E.g. a reference to a full complex value could
> in principle depend on a reference to the imaginary component.
> A later patch adds support for this kind of dependence.
>
> On the other side, checking modulo vf is pessimistic when the step
> divided by the element size is a factor of 2.
>
> This patch instead looks for cases in which the drs have the same
> base, offset and step, and for which the difference in their constant
> initial values is a multiple of the alignment.

I'm a bit wary about trusting operand_equal_p over dependence analysis.
So, did you (can you) add an assert that the new code computes
same alignment in all cases where the old one did?

> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Otherwise looks ok.

Thanks,
Richard.

> Thanks,
> Richard
>
>
> gcc/
> 2017-05-03  Richard Sandiford  
>
> * tree-vect-data-refs.c (vect_find_same_alignment_drs): Remove
> loop_vinfo argument and use of dependence distance vectors.
> Check instead whether the two references differ only in their
> initial value and assume that they have the same alignment if the
> difference is a multiple of the vector alignment.
> (vect_analyze_data_refs_alignment): Update call accordingly.
>
> gcc/testsuite/
> * gcc.dg/vect/vect-103.c: Update wording of dump message.
>
> Index: gcc/tree-vect-data-refs.c
> ===
> --- gcc/tree-vect-data-refs.c   2017-04-18 19:52:35.060164268 +0100
> +++ gcc/tree-vect-data-refs.c   2017-05-03 08:48:30.536704993 +0100
> @@ -2042,20 +2042,12 @@ vect_enhance_data_refs_alignment (loop_v
> vectorization factor.  */
>
>  static void
> -vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
> - loop_vec_info loop_vinfo)
> +vect_find_same_alignment_drs (struct data_dependence_relation *ddr)
>  {
> -  unsigned int i;
> -  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
> -  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
>struct data_reference *dra = DDR_A (ddr);
>struct data_reference *drb = DDR_B (ddr);
>stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
>stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
> -  int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra;
> -  int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb;
> -  lambda_vector dist_v;
> -  unsigned int loop_depth;
>
>if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
>  return;
> @@ -2063,48 +2055,37 @@ vect_find_same_alignment_drs (struct dat
>if (dra == drb)
>  return;
>
> -  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
> -return;
> -
> -  /* Loop-based vectorization and known data dependence.  */
> -  if (DDR_NUM_DIST_VECTS (ddr) == 0)
> -return;
> -
> -  /* Data-dependence analysis reports a distance vector of zero
> - for data-references that overlap only in the first iteration
> - but have different sign step (see PR45764).
> - So as a sanity check require equal DR_STEP.  */
> -  if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
> +  if (!operand_equal_p (DR_BASE_OBJECT (dra), DR_BASE_OBJECT (drb),
> +   OEP_ADDRESS_OF)
> +  || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0)
> +  || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
>  return;
>
> -  loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
> -  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
> +  /* Two references with distance zero have the same alignment.  */
> +  offset_int diff = (wi::to_offset (DR_INIT (dra))
> +- wi::to_offset (DR_INIT (drb)));
> +  if (diff != 0)
>  {
> -  int dist = dist_v[loop_depth];
> +  /* Get the wider of the two alignments.  */
> +  unsigned int align_a = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE 
> (stmtinfo_a));
> +  unsigned int align_b = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE 
> (stmtinfo_b));
> +  unsigned int max_align = MAX (align_a, align_b);
> +
> +  /* Require the gap to be a multiple of the larger vector alignment.  */
> +  if (!wi::multiple_of_p (diff, max_align, SIGNED))
> +   return;
> +}
>
> -  if (dump_enabled_p ())
> -   dump_printf_loc (MSG_NOTE, vect_location,
> -"dependence distance  = %d.\n", dist);
> -
> -  /* Same loop iteration.  */
> -  if (dist == 0
> - || (dist % vectorization_factor == 0 && dra_size == drb_size))
> -   {
> - /* Two references with distance zero have the same alignment.  */
> - STMT

Re: [PATCH GCC8][01/33]Handle TRUNCATE between tieable modes in rtx_cost

2017-05-03 Thread Kyrill Tkachov

Hi Bin,

On 03/05/17 11:02, Bin.Cheng wrote:

On Wed, May 3, 2017 at 9:38 AM, Bin.Cheng  wrote:

On Wed, May 3, 2017 at 7:17 AM, Eric Botcazou  wrote:

2017-04-11  Bin Cheng  

   * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes.

This breaks bootstrap with RTL checking:

/home/eric/build/gcc/native/./gcc/xgcc -B/home/eric/build/gcc/native/./gcc/ -
nostdinc -x c /dev/null -S -o /dev/null -fself-
test=/home/eric/svn/gcc/gcc/testsuite/selftests
cc1: internal compiler error: RTL check: expected code 'subreg', have
'truncate' in rtx_cost, at rtlanal.c:4169
0xbae338 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int,
char const*)
 /home/eric/svn/gcc/gcc/rtl.c:829
0xbbc9b4 rtx_cost(rtx_def*, machine_mode, rtx_code, int, bool)
 /home/eric/svn/gcc/gcc/rtlanal.c:4169
0x8517e6 set_src_cost
 /home/eric/svn/gcc/gcc/rtl.h:2685
0x8517e6 init_expmed_one_conv
 /home/eric/svn/gcc/gcc/expmed.c:142
0x8517e6 init_expmed_one_mode
 /home/eric/svn/gcc/gcc/expmed.c:209
0x853fb2 init_expmed()
 /home/eric/svn/gcc/gcc/expmed.c:270
0xc45974 backend_init_target
 /home/eric/svn/gcc/gcc/toplev.c:1665
0xc45974 initialize_rtl()


Sorry for disturbing, I will revert this if can't fix today.

It looks bogus and I couldn't find the motivating case for it, so
revert with attached patch.  Build on x86 and commit as obvious.

Thanks,
bin
2017-05-03  Bin Cheng  

 Revert
 2017-05-02  Bin Cheng  
 * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes.


Looking at the code in the patch...

+case TRUNCATE:
+  /* If we can tie these modes, make this cheap.  */
+  if (MODES_TIEABLE_P (mode, GET_MODE (SUBREG_REG (x

'code' here is GET_CODE (x) and in this case it is TRUNCATE.
SUBREG_REG asserts (in RTL checking mode) that its argument is a SUBREG, so 
passing it a TRUNCATE rtx would cause
the checking failure Eric reported. I think you meant to use XEXP (x, 0)  
instead of SUBREG_REG (x) ?

Thanks,
Kyrill



Re: [PATCH GCC8][01/33]Handle TRUNCATE between tieable modes in rtx_cost

2017-05-03 Thread Bin.Cheng
On Wed, May 3, 2017 at 11:09 AM, Kyrill Tkachov
 wrote:
> Hi Bin,
>
>
> On 03/05/17 11:02, Bin.Cheng wrote:
>>
>> On Wed, May 3, 2017 at 9:38 AM, Bin.Cheng  wrote:
>>>
>>> On Wed, May 3, 2017 at 7:17 AM, Eric Botcazou 
>>> wrote:
>
> 2017-04-11  Bin Cheng  
>
>* rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes.

 This breaks bootstrap with RTL checking:

 /home/eric/build/gcc/native/./gcc/xgcc
 -B/home/eric/build/gcc/native/./gcc/ -
 nostdinc -x c /dev/null -S -o /dev/null -fself-
 test=/home/eric/svn/gcc/gcc/testsuite/selftests
 cc1: internal compiler error: RTL check: expected code 'subreg', have
 'truncate' in rtx_cost, at rtlanal.c:4169
 0xbae338 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*,
 int,
 char const*)
  /home/eric/svn/gcc/gcc/rtl.c:829
 0xbbc9b4 rtx_cost(rtx_def*, machine_mode, rtx_code, int, bool)
  /home/eric/svn/gcc/gcc/rtlanal.c:4169
 0x8517e6 set_src_cost
  /home/eric/svn/gcc/gcc/rtl.h:2685
 0x8517e6 init_expmed_one_conv
  /home/eric/svn/gcc/gcc/expmed.c:142
 0x8517e6 init_expmed_one_mode
  /home/eric/svn/gcc/gcc/expmed.c:209
 0x853fb2 init_expmed()
  /home/eric/svn/gcc/gcc/expmed.c:270
 0xc45974 backend_init_target
  /home/eric/svn/gcc/gcc/toplev.c:1665
 0xc45974 initialize_rtl()

>>> Sorry for disturbing, I will revert this if can't fix today.
>>
>> It looks bogus and I couldn't find the motivating case for it, so
>> revert with attached patch.  Build on x86 and commit as obvious.
>>
>> Thanks,
>> bin
>> 2017-05-03  Bin Cheng  
>>
>>  Revert
>>  2017-05-02  Bin Cheng  
>>  * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes.
>
>
> Looking at the code in the patch...
>
> +case TRUNCATE:
> +  /* If we can tie these modes, make this cheap.  */
> +  if (MODES_TIEABLE_P (mode, GET_MODE (SUBREG_REG (x
>
> 'code' here is GET_CODE (x) and in this case it is TRUNCATE.
> SUBREG_REG asserts (in RTL checking mode) that its argument is a SUBREG, so
> passing it a TRUNCATE rtx would cause
> the checking failure Eric reported. I think you meant to use XEXP (x, 0)
> instead of SUBREG_REG (x) ?
Yes, I guess so.  Reverted since I couldn't find the original test.

Thanks,
bin
>
> Thanks,
> Kyrill
>


Re: [RFC][PATCH] Introduce -fdump*-folding

2017-05-03 Thread Richard Biener
On Wed, May 3, 2017 at 10:10 AM, Martin Liška  wrote:
> Hello
>
> Last release cycle I spent quite some time with reading of IVOPTS pass
> dump file. Using -fdump*-details causes to generate a lot of 'Applying 
> pattern'
> lines, which can make reading of a dump file more complicated.
>
> There are stats for tramp3d with -O2 and -fdump-tree-all-details. Percentage 
> number
> shows how many lines are of the aforementioned pattern:
>
> tramp3d-v4.cpp.164t.ivopts: 6.34%
>   tramp3d-v4.cpp.091t.ccp2: 5.04%
>   tramp3d-v4.cpp.093t.cunrolli: 4.41%
>   tramp3d-v4.cpp.129t.laddress: 3.70%
>   tramp3d-v4.cpp.032t.ccp1: 2.31%
>   tramp3d-v4.cpp.038t.evrp: 1.90%
>  tramp3d-v4.cpp.033t.forwprop1: 1.74%
>   tramp3d-v4.cpp.103t.vrp1: 1.52%
>  tramp3d-v4.cpp.124t.forwprop3: 1.31%
>   tramp3d-v4.cpp.181t.vrp2: 1.30%
>tramp3d-v4.cpp.161t.cunroll: 1.22%
> tramp3d-v4.cpp.027t.fixup_cfg3: 1.11%
>tramp3d-v4.cpp.153t.ivcanon: 1.07%
>   tramp3d-v4.cpp.126t.ccp3: 0.96%
>   tramp3d-v4.cpp.143t.sccp: 0.91%
>  tramp3d-v4.cpp.185t.forwprop4: 0.82%
>tramp3d-v4.cpp.011t.cfg: 0.74%
>  tramp3d-v4.cpp.096t.forwprop2: 0.50%
> tramp3d-v4.cpp.019t.fixup_cfg1: 0.37%
>  tramp3d-v4.cpp.120t.phicprop1: 0.33%
>tramp3d-v4.cpp.133t.pre: 0.32%
>  tramp3d-v4.cpp.182t.phicprop2: 0.27%
> tramp3d-v4.cpp.170t.veclower21: 0.25%
>tramp3d-v4.cpp.029t.einline: 0.24%
>
> I'm suggesting to add new TDF that will be allocated for that.
> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>
> Thoughts?

Ok.  Soon we'll want to change dump_flags to uint64_t ...  (we have 1 bit left
if you allow negative dump_flags).  It'll tickle down on a lot of interfaces
so introducing dump_flags_t at the same time might be a good idea.

Thanks,
Richard.

> Martin


Re: Alternative check for vector refs with same alignment

2017-05-03 Thread Bin.Cheng
On Wed, May 3, 2017 at 11:07 AM, Richard Biener
 wrote:
> On Wed, May 3, 2017 at 9:54 AM, Richard Sandiford
>  wrote:
>> vect_find_same_alignment_drs uses the ddr dependence distance
>> to tell whether two references have the same alignment.  Although
>> that's safe with the current code, there's no particular reason
>> why a dependence distance of 0 should mean that the accesses start
>> on the same byte.  E.g. a reference to a full complex value could
>> in principle depend on a reference to the imaginary component.
>> A later patch adds support for this kind of dependence.
>>
>> On the other side, checking modulo vf is pessimistic when the step
>> divided by the element size is a factor of 2.
>>
>> This patch instead looks for cases in which the drs have the same
>> base, offset and step, and for which the difference in their constant
>> initial values is a multiple of the alignment.
>
> I'm a bit wary about trusting operand_equal_p over dependence analysis.
> So, did you (can you) add an assert that the new code computes
> same alignment in all cases where the old one did?
At the moment operand_equal_p method looks more powerful than
dependence analysis, for example, it can handle the same memory
reference with pointer/array_ref forms like in PR65206.  However,
given dependence check is not expensive here, is it possible to build
the patch on top of it when it fails?

Thanks,
bin
>
>> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
>
> Otherwise looks ok.
>
> Thanks,
> Richard.
>
>> Thanks,
>> Richard
>>
>>
>> gcc/
>> 2017-05-03  Richard Sandiford  
>>
>> * tree-vect-data-refs.c (vect_find_same_alignment_drs): Remove
>> loop_vinfo argument and use of dependence distance vectors.
>> Check instead whether the two references differ only in their
>> initial value and assume that they have the same alignment if the
>> difference is a multiple of the vector alignment.
>> (vect_analyze_data_refs_alignment): Update call accordingly.
>>
>> gcc/testsuite/
>> * gcc.dg/vect/vect-103.c: Update wording of dump message.
>>
>> Index: gcc/tree-vect-data-refs.c
>> ===
>> --- gcc/tree-vect-data-refs.c   2017-04-18 19:52:35.060164268 +0100
>> +++ gcc/tree-vect-data-refs.c   2017-05-03 08:48:30.536704993 +0100
>> @@ -2042,20 +2042,12 @@ vect_enhance_data_refs_alignment (loop_v
>> vectorization factor.  */
>>
>>  static void
>> -vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
>> - loop_vec_info loop_vinfo)
>> +vect_find_same_alignment_drs (struct data_dependence_relation *ddr)
>>  {
>> -  unsigned int i;
>> -  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>> -  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
>>struct data_reference *dra = DDR_A (ddr);
>>struct data_reference *drb = DDR_B (ddr);
>>stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
>>stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
>> -  int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra;
>> -  int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb;
>> -  lambda_vector dist_v;
>> -  unsigned int loop_depth;
>>
>>if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
>>  return;
>> @@ -2063,48 +2055,37 @@ vect_find_same_alignment_drs (struct dat
>>if (dra == drb)
>>  return;
>>
>> -  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
>> -return;
>> -
>> -  /* Loop-based vectorization and known data dependence.  */
>> -  if (DDR_NUM_DIST_VECTS (ddr) == 0)
>> -return;
>> -
>> -  /* Data-dependence analysis reports a distance vector of zero
>> - for data-references that overlap only in the first iteration
>> - but have different sign step (see PR45764).
>> - So as a sanity check require equal DR_STEP.  */
>> -  if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
>> +  if (!operand_equal_p (DR_BASE_OBJECT (dra), DR_BASE_OBJECT (drb),
>> +   OEP_ADDRESS_OF)
>> +  || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0)
>> +  || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
>>  return;
>>
>> -  loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
>> -  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
>> +  /* Two references with distance zero have the same alignment.  */
>> +  offset_int diff = (wi::to_offset (DR_INIT (dra))
>> +- wi::to_offset (DR_INIT (drb)));
>> +  if (diff != 0)
>>  {
>> -  int dist = dist_v[loop_depth];
>> +  /* Get the wider of the two alignments.  */
>> +  unsigned int align_a = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE 
>> (stmtinfo_a));
>> +  unsigned int align_b = TYPE_ALIGN_UNIT (STMT_VINFO_VECTYPE 
>> (stmtinfo_b));
>> +  unsigned int max_align = MAX (align_a, align_b);
>> +
>> +  /* Require the gap to be a multiple of the larger vector alignment.  

Re: [PATCH] handle sprintf(d, "%s", ...) in gimple-ssa-sprintf.c

2017-05-03 Thread Richard Biener
On Tue, May 2, 2017 at 4:41 PM, Martin Sebor  wrote:
 FWIW, my fix for bug 79062 is only partial (it gets the pass
 to run but the warnings are still not issued).  I don't quite
 understand what prevents the warning flag(s) from getting set
 when -flto is used.  This seems to be a bigger problem than
 just the sprintf pass not doing something just right.
>>>
>>>
>>> I've never dug deeply in the LTO stuff, but I believe we stream the
>>> compiler
>>> flags, so it could be something there.
>>
>>
>> We do.
>>
>>> Alternately you might be running into a case where in LTO mode we
>>> recreate
>>> base types.  Look for a type equality tester that goes beyond just
>>> testing
>>> pointer equality.
>>>
>>> ie, in LTO I think we'll create a type based on the streamed data, but I
>>> also think we'll create various basic types.  Thus in LTO mode pointer
>>> equality may not be sufficient.
>>
>>
>> We make sure that for most basic types we end up re-using them where
>> possible.
>> char_type_node is an example where that generally doesn't work because
>> it's
>> value depends on a command-line flag.
>
>
> That answers the first part of the question of why the sprintf
> pass wouldn't run (or do anything) with -flto.   With it fixed
> (as in fold-const.c or tree-ssa-strlen.c as you suggested in
> bug 79602) it runs and the optimization does its job, but no
> warnings are issued.  The wan_foo_flags for warnings that are
> enabled implicitly (e.g., by -Wall or -Wextra on the command
> line) are clear.  There seem to be dependencies between warnings
> in c.opt that ignore LTO (as a language), but even with those
> corrected (i.e., with LTO added as a language to -Wformat and
> -Wall) the flags are still clear when LTO runs.  Does that ring
> any bells for you?

You can look at the lto_opts section (it's just a string) and see
that we seem to fail to pass through -Wall (or any warning option
I tried).  This is because

  /* Also drop all options that are handled by the driver as well,
 which includes things like -o and -v or -fhelp for example.
 We do not need those.  The only exception is -foffload option, if we
 write it in offload_lto section.  Also drop all diagnostic options.  */
  if ((cl_options[option->opt_index].flags & (CL_DRIVER|CL_WARNING))
  && (!lto_stream_offload_p || option->opt_index != OPT_foffload_))
continue;

which means you have to explicitely enable diagnostics you want at
link time at the moment.

If you want to change that you have to do some changes to lto-wrapper.c
as for example only pass through warning options that are set on all
input files (warning options are not kept per function).

Richard.

>
> Thanks
> Martin


Re: [PATCH, testsuite]: Fix g++.dg/lto/pr79671 execute failure

2017-05-03 Thread Richard Biener
On Tue, May 2, 2017 at 5:38 PM, Uros Bizjak  wrote:
> Hello!
>
> We have to match input and output operand (or use "+") of an empty asm
> to move the value to a register.
>
> 2017-05-02  Uros Bizjak  
>
> * g++.dg/lto/pr79671_0.C (foo): Fix asm constraints.
>
> Tested on alphaev68-linux-gnu (where the unpatched testcase fails) and
> x86_64-linux-gnu.
>
> OK for mailine?

Ok.

>
> Uros.


Re: Alternative check for vector refs with same alignment

2017-05-03 Thread Richard Sandiford
"Bin.Cheng"  writes:
> On Wed, May 3, 2017 at 11:07 AM, Richard Biener
>  wrote:
>> On Wed, May 3, 2017 at 9:54 AM, Richard Sandiford
>>  wrote:
>>> vect_find_same_alignment_drs uses the ddr dependence distance
>>> to tell whether two references have the same alignment.  Although
>>> that's safe with the current code, there's no particular reason
>>> why a dependence distance of 0 should mean that the accesses start
>>> on the same byte.  E.g. a reference to a full complex value could
>>> in principle depend on a reference to the imaginary component.
>>> A later patch adds support for this kind of dependence.
>>>
>>> On the other side, checking modulo vf is pessimistic when the step
>>> divided by the element size is a factor of 2.
>>>
>>> This patch instead looks for cases in which the drs have the same
>>> base, offset and step, and for which the difference in their constant
>>> initial values is a multiple of the alignment.
>>
>> I'm a bit wary about trusting operand_equal_p over dependence analysis.
>> So, did you (can you) add an assert that the new code computes
>> same alignment in all cases where the old one did?

FWIW, the operand_equal_p for the base addresses is the same as the one
used by the dependence analysis:

  /* If the references do not access the same object, we do not know
 whether they alias or not.  We do not care about TBAA or alignment
 info so we can use OEP_ADDRESS_OF to avoid false negatives.
 But the accesses have to use compatible types as otherwise the
 built indices would not match.  */
  if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), OEP_ADDRESS_OF)
  || !types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (a)),
  TREE_TYPE (DR_BASE_OBJECT (b
{
  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
  return res;
}

> At the moment operand_equal_p method looks more powerful than
> dependence analysis, for example, it can handle the same memory
> reference with pointer/array_ref forms like in PR65206.  However,
> given dependence check is not expensive here, is it possible to build
> the patch on top of it when it fails?

The old check isn't valid after my later patches, because there's
no guarantee that the accesses start on the same byte.  And like
you say, the new check is more powerful in some ways (including
the modulo vf thing I mentioned).

So I'm not sure we can do anything useful with the dependence distance
information.  Sometimes it would give false positives and sometimes
it would give false negatives.

Thanks,
Richard


Re: [PATCH, GCC/ARM, Stage 1] PR71607: Fix ICE when loading constant

2017-05-03 Thread Richard Earnshaw (lists)
On 20/04/17 10:54, Prakhar Bahuguna wrote:
> [ARM] PR71607: Fix ICE when loading constant
> 
> gcc/ChangeLog:
> 
> 2017-04-18  Andre Vieira  
>   Prakhar Bahuguna  
> 
>   PR target/71607
>   * config/arm/arm.md (use_literal_pool): Removes.
>   (64-bit immediate split): No longer takes cost into consideration
>   if 'arm_disable_literal_pool' is enabled.
>   * config/arm/arm.c (arm_tls_referenced_p): Add diagnostic if TLS is
>   used when arm_disable_literal_pool is enabled.
>   (arm_max_const_double_inline_cost): Remove use of
>   arm_disable_literal_pool.
>   (arm_reorg): Add return if arm_disable_literal_pool is enabled.
>   * config/arm/vfp.md (no_literal_pool_df_immediate): New.
>   (no_literal_pool_sf_immediate): New.
> 
> testsuite/ChangeLog:
> 
> 2017-04-18  Andre Vieira  
>   Thomas Preud'homme  
>   Prakhar Bahuguna  
> 
>   PR target/71607
>   * gcc.target/arm/thumb2-slow-flash-data.c: Renamed to ...
>   * gcc.target/arm/thumb2-slow-flash-data-1.c: ... this.
>   * gcc.target/arm/thumb2-slow-flash-data-2.c: New.
>   * gcc.target/arm/thumb2-slow-flash-data-3.c: New.
>   * gcc.target/arm/thumb2-slow-flash-data-4.c: New.
>   * gcc.target/arm/thumb2-slow-flash-data-5.c: New.
>   * gcc.target/arm/tls-disable-literal-pool.c: New.
> 
> Okay for stage1?
> 

This patch lacks a description of what's going on and why the change is
necessary (it should stand alone from the PR data).  It's clearly a
non-trivial change, so why have you adopted this approach?

R.

> --
> 
> Prakhar Bahuguna
> 
> 
> 0001-ARM-PR71607-Fix-ICE-when-loading-constant.patch
> 
> 
> From 985100bbf8f168ab9a88ca29869453844eb6b58e Mon Sep 17 00:00:00 2001
> From: Prakhar Bahuguna 
> Date: Tue, 18 Apr 2017 14:16:46 +0100
> Subject: [PATCH] [ARM] PR71607: Fix ICE when loading constant
> 
> ---
>  gcc/config/arm/arm.c   | 20 +---
>  gcc/config/arm/arm.md  |  9 ++
>  gcc/config/arm/vfp.md  | 37 
> ++
>  ...low-flash-data.c => thumb2-slow-flash-data-1.c} |  0
>  .../gcc.target/arm/thumb2-slow-flash-data-2.c  | 28 
>  .../gcc.target/arm/thumb2-slow-flash-data-3.c  | 25 +++
>  .../gcc.target/arm/thumb2-slow-flash-data-4.c  | 26 +++
>  .../gcc.target/arm/thumb2-slow-flash-data-5.c  | 14 
>  .../gcc.target/arm/tls-disable-literal-pool.c  | 15 +
>  9 files changed, 163 insertions(+), 11 deletions(-)
>  rename gcc/testsuite/gcc.target/arm/{thumb2-slow-flash-data.c => 
> thumb2-slow-flash-data-1.c} (100%)
>  create mode 100644 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-2.c
>  create mode 100644 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-3.c
>  create mode 100644 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-4.c
>  create mode 100644 gcc/testsuite/gcc.target/arm/thumb2-slow-flash-data-5.c
>  create mode 100644 gcc/testsuite/gcc.target/arm/tls-disable-literal-pool.c
> 
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index a2d80cfd645..01d8c52d8c5 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -8633,7 +8633,16 @@ arm_tls_referenced_p (rtx x)
>  {
>const_rtx x = *iter;
>if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
> - return true;
> + {
> +   /* ARM currently does not provide relocations to encode TLS variables
> +  into AArch32 instructions, only data, so there is no way to
> +  currently implement these if a literal pool is disabled.  */
> +   if (arm_disable_literal_pool)
> + sorry ("accessing thread-local storage is not currently supported "
> +"with -mpure-code or -mslow-flash-data");
> +
> +   return true;
> + }
>  
>/* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
>TLS offsets, not real symbol references.  */
> @@ -16391,10 +16400,6 @@ push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT 
> address, rtx *loc,
>  int
>  arm_max_const_double_inline_cost ()
>  {
> -  /* Let the value get synthesized to avoid the use of literal pools.  */
> -  if (arm_disable_literal_pool)
> -return 99;
> -
>return ((optimize_size || arm_ld_sched) ? 3 : 4);
>  }
>  
> @@ -17341,6 +17346,11 @@ arm_reorg (void)
>if (!optimize)
>  split_all_insns_noflow ();
>  
> +  /* Make sure we do not attempt to create a literal pool even though it 
> should
> + no longer be necessary to create any.  */
> +  if (arm_disable_literal_pool)
> +return ;
> +
>minipool_fix_head = minipool_fix_tail = NULL;
>  
>/* The first insn must always be a note, or the code below won't
> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> index 21cfe3a4c31..f9365cde504 100644
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -233,10 +233,6 @@
>  (match_test

Re: [PATCH 2/5][GIMPLE FE] PR testsuite/80580: handle invalid types of "->" operands

2017-05-03 Thread Richard Biener
On Mon, May 1, 2017 at 8:05 PM, Mikhail Maltsev  wrote:
> This bug happens when the LHS of operator '->' is either missing, i.e.:
>
> (->a) = 0;
>
> or it is not a pointer:
>
> int b;
> b_2->c = 0;
>
> LHS should be validated.

I think for the missing LHS it's better to simply not generate code
when expr is in error
state like with

Index: gcc/c/gimple-parser.c
===
--- gcc/c/gimple-parser.c   (revision 247542)
+++ gcc/c/gimple-parser.c   (working copy)
@@ -968,6 +968,8 @@ c_parser_gimple_postfix_expression_after
break;
  }

+   if (expr.value == error_mark_node)
+ break;
start = expr.get_start ();
finish = c_parser_tokens_buf (parser, 0)->location;
expr.value = build_array_ref (op_loc, expr.value, idx);
@@ -986,6 +988,8 @@ c_parser_gimple_postfix_expression_after
  c_parser_gimple_expr_list (parser, &exprlist);
c_parser_skip_until_found (parser, CPP_CLOSE_PAREN,
   "expected %<)%>");
+   if (expr.value == error_mark_node)
+ break;
expr.value = build_call_array_loc
(expr_loc, TREE_TYPE (TREE_TYPE (expr.value)),
 expr.value, exprlist.length (), exprlist.address ());
@@ -1014,6 +1018,8 @@ c_parser_gimple_postfix_expression_after
start = expr.get_start ();
finish = c_parser_peek_token (parser)->get_finish ();
c_parser_consume_token (parser);
+   if (expr.value == error_mark_node)
+ break;
expr.value = build_component_ref (op_loc, expr.value, ident,
  comp_loc);
set_c_expr_source_range (&expr, start, finish);
@@ -1052,6 +1058,8 @@ c_parser_gimple_postfix_expression_after
start = expr.get_start ();
finish = c_parser_peek_token (parser)->get_finish ();
c_parser_consume_token (parser);
+   if (expr.value == error_mark_node)
+ break;
expr.value = build_component_ref (op_loc,
  build_simple_mem_ref_loc
(op_loc, expr.value),

it'll also get us some error recovery compared to stop parsing earlier.

So maybe you can factor out the "missing op" case from this and the
DOT case, doing
the above?

Then handle callers of build_simple_mem_ref_loc to add sanity checking
that we've
dereferencing a pointer.

Thanks,
Richard.

> --
> Regards,
>Mikhail Maltsev
>
> gcc/testsuite/ChangeLog:
>
> 2017-05-01  Mikhail Maltsev  
>
> * gcc.dg/gimplefe-error-6.c: New test.
> * gcc.dg/gimplefe-error-7.c: New test.
>
>
> gcc/c/ChangeLog:
>
> 2017-05-01  Mikhail Maltsev  
>
> * gimple-parser.c (c_parser_gimple_postfix_expression_after_primary):
> Check LHS of operator '->'
>
>


Re: [PATCH 3/5][GIMPLE FE] PR testsuite/80580. Handle invalid unary "*" operand type

2017-05-03 Thread Richard Biener
On Mon, May 1, 2017 at 8:07 PM, Mikhail Maltsev  wrote:
> This is essentially the same problem as in patch 2, but with unary '*'. We
> should verify that its argument is a pointer.

This is ok with being more specific like

 "expected pointer as argument of unary %<*%>"

Thanks,
Richard.

> --
> Regards,
>Mikhail Maltsev
>
>
> gcc/c/ChangeLog:
>
> 2017-05-01  Mikhail Maltsev  
>
> * gimple-parser.c (c_parser_gimple_unary_expression): Check argument
> type of unary '*'.
>
> gcc/testsuite/ChangeLog:
>
> 2017-05-01  Mikhail Maltsev  
>
> * gcc.dg/gimplefe-error-8.c: New test.
>
>


Re: [PATCH] canonical type hashing

2017-05-03 Thread Richard Biener
On Tue, May 2, 2017 at 3:50 PM, Nathan Sidwell  wrote:
> On the modules branch, I need rematerialize canonical types and the like
> from a read-in serialized tree.
>
> I discovered the canonical-type hash table is fed a bespoke hash value by
> each type creator.  There was no generic type hasher :( The rationale
> appears to allow each type constuctor to just specialize its needs.
> Excitingly, a generic type hasher is hiding inside
> build_type_attribute_qual_variant.  So I broke it out of there and
> generalized it a bit more.
>
> The type hashers had diverged from the attribute-variant hasher.  This is
> not an error, because the attribute variant version was creating variants
> with non-null attributes, so guaranteed different  But it was confusing.
>
> This generic hasher is slightly different to the bespoke hashers in a few
> places. One place of note was the vector type hasher, which mixed {elt-type,
> num-elts, vector-mode}, but vector-mode is entirely determined by the first
> two object, so mixing it in doesn't add any entropy.  I dropped the mode.
>
> I still kept generating the hashvalue separate to the type_hash_canon call
> itself.  Perhaps a future patch could change that, but I didn't want to much
> churn in this patch.
>
> I've included Jakub's recent TYPE_TYPELESS_STORAGE changes. (And notice that
> the attribute-type hasher wasn't dealing with it.)
>
> booted and tested on x86_64-linux-gnu, ok?

Looks ok to me.  The new function name 'type_hash_default' is a little
odd, maybe
name it type_hash_canon_hash instead?

Thanks,
Richard.

> nathan
> --
> Nathan Sidwell


Re: [PATCH] ggc-page loop

2017-05-03 Thread Nathan Sidwell

On 05/03/2017 05:54 AM, Richard Biener wrote:


He does, albeit in a coding way I dislike (watch how the iterator
update is done in the condition!)


Sorry you dislike that.  I've used that kind of loop idiom, since for ever 
though.

nathan

--
Nathan Sidwell


Re: [PATCH] canonical type hashing

2017-05-03 Thread Nathan Sidwell

On 05/03/2017 07:32 AM, Richard Biener wrote:

On Tue, May 2, 2017 at 3:50 PM, Nathan Sidwell  wrote:



Looks ok to me.  The new function name 'type_hash_default' is a little
odd, maybe
name it type_hash_canon_hash instead?


sure.  I think when I originally broke it out, I didn't know it would end up 
replacing all the uses :)


nathan
--
Nathan Sidwell


[C++ PATCH] name lookup cleanup

2017-05-03 Thread Nathan Sidwell
I'm starting to push my name lookup cleanups from the modules branch.  This 
first patch simply reorders existing decls in cp-tree.h to before the 
name-lookup #include.  It'll allow moving some more things into the 
cp_global_trees array.


The thrust of the name-lookup cleanup will remove a whole bunch of O(N^2) 
algorithms, in addition to making name-lookup.c itself less of a maze of twisty 
forwarding functions.


I'm not pushing any module-specific bits at this point.

nathan
--
Nathan Sidwell
2017-05-03  Nathan Sidwell  

* cp-tree.h (enum cp_tree_index, cp_global_trees): Move earlier,
along with #defines, to before name-lookup include.

Index: cp-tree.h
===
--- cp-tree.h   (revision 247529)
+++ cp-tree.h   (working copy)
@@ -102,6 +102,182 @@ operator == (const cp_expr &lhs, tree rh
   return lhs.get_value () == rhs;
 }
 
+
+enum cp_tree_index
+{
+CPTI_WCHAR_DECL,
+CPTI_VTABLE_ENTRY_TYPE,
+CPTI_DELTA_TYPE,
+CPTI_VTABLE_INDEX_TYPE,
+CPTI_CLEANUP_TYPE,
+CPTI_VTT_PARM_TYPE,
+
+CPTI_CLASS_TYPE,
+CPTI_UNKNOWN_TYPE,
+CPTI_INIT_LIST_TYPE,
+CPTI_VTBL_TYPE,
+CPTI_VTBL_PTR_TYPE,
+CPTI_STD,
+CPTI_ABI,
+CPTI_CONST_TYPE_INFO_TYPE,
+CPTI_TYPE_INFO_PTR_TYPE,
+CPTI_ABORT_FNDECL,
+CPTI_AGGR_TAG,
+
+CPTI_CTOR_IDENTIFIER,
+CPTI_COMPLETE_CTOR_IDENTIFIER,
+CPTI_BASE_CTOR_IDENTIFIER,
+CPTI_DTOR_IDENTIFIER,
+CPTI_COMPLETE_DTOR_IDENTIFIER,
+CPTI_BASE_DTOR_IDENTIFIER,
+CPTI_DELETING_DTOR_IDENTIFIER,
+CPTI_DELTA_IDENTIFIER,
+CPTI_IN_CHARGE_IDENTIFIER,
+CPTI_VTT_PARM_IDENTIFIER,
+CPTI_NELTS_IDENTIFIER,
+CPTI_THIS_IDENTIFIER,
+CPTI_PFN_IDENTIFIER,
+CPTI_VPTR_IDENTIFIER,
+CPTI_STD_IDENTIFIER,
+CPTI_AUTO_IDENTIFIER,
+CPTI_DECLTYPE_AUTO_IDENTIFIER,
+
+CPTI_LANG_NAME_C,
+CPTI_LANG_NAME_CPLUSPLUS,
+
+CPTI_EMPTY_EXCEPT_SPEC,
+CPTI_NOEXCEPT_TRUE_SPEC,
+CPTI_NOEXCEPT_FALSE_SPEC,
+CPTI_TERMINATE,
+CPTI_CALL_UNEXPECTED,
+CPTI_ATEXIT_FN_PTR_TYPE,
+CPTI_ATEXIT,
+CPTI_DSO_HANDLE,
+CPTI_DCAST,
+
+CPTI_KEYED_CLASSES,
+
+CPTI_NULLPTR,
+CPTI_NULLPTR_TYPE,
+
+CPTI_ALIGN_TYPE,
+
+CPTI_ANY_TARG,
+
+CPTI_MAX
+};
+
+extern GTY(()) tree cp_global_trees[CPTI_MAX];
+
+#define wchar_decl_nodecp_global_trees[CPTI_WCHAR_DECL]
+#define vtable_entry_type  cp_global_trees[CPTI_VTABLE_ENTRY_TYPE]
+/* The type used to represent an offset by which to adjust the `this'
+   pointer in pointer-to-member types.  */
+#define delta_type_nodecp_global_trees[CPTI_DELTA_TYPE]
+/* The type used to represent an index into the vtable.  */
+#define vtable_index_type  cp_global_trees[CPTI_VTABLE_INDEX_TYPE]
+
+#define class_type_nodecp_global_trees[CPTI_CLASS_TYPE]
+#define unknown_type_node  cp_global_trees[CPTI_UNKNOWN_TYPE]
+#define init_list_type_nodecp_global_trees[CPTI_INIT_LIST_TYPE]
+#define vtbl_type_node cp_global_trees[CPTI_VTBL_TYPE]
+#define vtbl_ptr_type_node cp_global_trees[CPTI_VTBL_PTR_TYPE]
+#define std_node   cp_global_trees[CPTI_STD]
+#define abi_node   cp_global_trees[CPTI_ABI]
+#define const_type_info_type_node  
cp_global_trees[CPTI_CONST_TYPE_INFO_TYPE]
+#define type_info_ptr_type cp_global_trees[CPTI_TYPE_INFO_PTR_TYPE]
+#define abort_fndecl   cp_global_trees[CPTI_ABORT_FNDECL]
+#define current_aggr   cp_global_trees[CPTI_AGGR_TAG]
+#define nullptr_node   cp_global_trees[CPTI_NULLPTR]
+#define nullptr_type_node  cp_global_trees[CPTI_NULLPTR_TYPE]
+/* std::align_val_t */
+#define align_type_nodecp_global_trees[CPTI_ALIGN_TYPE]
+
+/* We cache these tree nodes so as to call get_identifier less
+   frequently.  */
+
+/* The name of a constructor that takes an in-charge parameter to
+   decide whether or not to construct virtual base classes.  */
+#define ctor_identifier
cp_global_trees[CPTI_CTOR_IDENTIFIER]
+/* The name of a constructor that constructs virtual base classes.  */
+#define complete_ctor_identifier   
cp_global_trees[CPTI_COMPLETE_CTOR_IDENTIFIER]
+/* The name of a constructor that does not construct virtual base classes.  */
+#define base_ctor_identifier   
cp_global_trees[CPTI_BASE_CTOR_IDENTIFIER]
+/* The name of a destructor that takes an in-charge parameter to
+   decide whether or not to destroy virtual base classes and whether
+   or not to delete the object.  */
+#define dtor_identifier
cp_global_trees[CPTI_DTOR_IDENTIFIER]
+/* The name of a destructor that destroys virtual base classes.  */
+#define complete_dtor_identifier   
cp_global_trees[CPTI_COMPLETE_DTOR_IDENTIFIER]
+/* The name of a destructor that 

[committed] New fix-it printer

2017-05-03 Thread David Malcolm
The existing fix-it printer can lead to difficult-to-read output
when fix-it hints are near each other.  For example, in a recent
patch to add fix-it hints to the C++ frontend's -Wold-style-cast,
e.g. for:

  foo *f = (foo *)ptr->field;
   ^

the fix-it hints:
 replace the open paren with "const_cast<"
 replace the close paren with "> ("
 insert ")" after the "ptr->field"

would be printed in this odd-looking way:

  foo *f = (foo *)ptr->field;
   ^
   -
   const_cast<
 -
 > ()

class rich_location consolidates adjacent fix-it hints, which helps
somewhat, but the underlying problem is that the existing printer
simply walks through the list of hints printing them, starting newlines
as necessary.

This patch reimplements fix-it printing by introducing a planning
stage: a new class line_corrections "plans" how to print the
fix-it hints affecting a line, generating a vec of "correction"
instances.  Hints that are sufficiently close to each other are
consolidated at this stage.

This leads to the much more reasonable output for the above case:

  foo *f = (foo *)ptr->field;
   ^
   -
   const_cast (ptr->field);

where the 3 hints are consolidated into one "correction" at printing.

Successfully bootstrapped®rtested on x86_64-pc-linux-gnu.

Committed to trunk as r247548.

gcc/ChangeLog:
* diagnostic-show-locus.c (struct column_range): New struct.
(get_affected_columns): New function.
(get_printed_columns): New function.
(struct correction): New struct.
(correction::ensure_capacity): New function.
(correction::ensure_terminated): New function.
(struct line_corrections): New struct.
(line_corrections::~line_corrections): New dtor.
(line_corrections::add_hint): New function.
(layout::print_trailing_fixits): Reimplement in terms of the new
classes.
(selftest::test_overlapped_fixit_printing): New function.
(selftest::diagnostic_show_locus_c_tests): Call it.
---
 gcc/diagnostic-show-locus.c | 572 +---
 1 file changed, 534 insertions(+), 38 deletions(-)

diff --git a/gcc/diagnostic-show-locus.c b/gcc/diagnostic-show-locus.c
index b91c9d5..f410a32 100644
--- a/gcc/diagnostic-show-locus.c
+++ b/gcc/diagnostic-show-locus.c
@@ -1242,6 +1242,295 @@ layout::annotation_line_showed_range_p (int line, int 
start_column,
   return false;
 }
 
+/* Classes for printing trailing fix-it hints i.e. those that
+   don't add new lines.
+
+   For insertion, these can look like:
+
+ new_text
+
+   For replacement, these can look like:
+
+ - : underline showing affected range
+ new_text
+
+   For deletion, these can look like:
+
+ - : underline showing affected range
+
+   This can become confusing if they overlap, and so we need
+   to do some preprocessing to decide what to print.
+   We use the list of fixit_hint instances affecting the line
+   to build a list of "correction" instances, and print the
+   latter.
+
+   For example, consider a set of fix-its for converting
+   a C-style cast to a C++ const_cast.
+
+   Given:
+
+   ..0112233.
+   ..123456789012345678901234567890123456789.
+ foo *f = (foo *)ptr->field;
+  ^
+
+   and the fix-it hints:
+ - replace col 10 (the open paren) with "const_cast<"
+ - replace col 16 (the close paren) with "> ("
+ - insert ")" before col 27
+
+   then we would get odd-looking output:
+
+ foo *f = (foo *)ptr->field;
+  ^
+  -
+  const_cast<
+-
+> ()
+
+   It would be better to detect when fixit hints are going to
+   overlap (those that require new lines), and to consolidate
+   the printing of such fixits, giving something like:
+
+ foo *f = (foo *)ptr->field;
+  ^
+  -
+  const_cast (ptr->field)
+
+   This works by detecting when the printing would overlap, and
+   effectively injecting no-op replace hints into the gaps between
+   such fix-its, so that the printing joins up.
+
+   In the above example, the overlap of:
+ - replace col 10 (the open paren) with "const_cast<"
+   and:
+ - replace col 16 (the close paren) with "> ("
+   is fixed by injecting a no-op:
+ - replace cols 11-15 with themselves ("foo *")
+   and consolidating these, making:
+ - replace cols 10-16 with "const_cast<" + "foo *" + "> ("
+   i.e.:
+ - replace cols 10-16 with "const_cast ("
+
+   This overlaps with the final fix-it hint:
+ - insert ")" before col 27
+   and so we repeat the consolidation process, by injecting
+   a no-op:
+ - replace cols 17-26 with themselves ("ptr->field")
+   giving:
+ - replac

Re: [PATCH] Implement a warning for bogus sizeof(pointer) / sizeof(pointer[0])

2017-05-03 Thread Jason Merrill
On Tue, May 2, 2017 at 9:26 AM, Bernd Edlinger
 wrote:
> On 05/01/17 17:54, Jason Merrill wrote:
>> On Fri, Apr 28, 2017 at 1:05 PM, Bernd Edlinger
>>  wrote:
>>> On 04/28/17 17:29, Martin Sebor wrote:
 On 04/28/2017 08:12 AM, Bernd Edlinger wrote:
>
> Do you want me to change the %qT format strings to %T ?

 Yes, with the surrounding %< and %> the nested directives should
 use the unquoted forms, otherwise the printer would end up quoting
 both the whole expression and the type operand.

 FWIW, to help avoid this mistake, I think this might be something
 for GCC -Wformat to warn on and the pretty-printer to detect (and
 ICE on).

>>>
>>> Ah, now I understand.  That's pretty advanced.
>>>
>>> Here is the modified patch with correct quoting of the expression.
>>>
>>> Bootstrap and reg-testing on x86_64-pc-linux-gnu.
>>
>>> * cp-gimplify.c (cp_fold): Implement the -Wsizeof_pointer_div warning.
>>
>> I think this warning belongs in cp_build_binary_op rather than cp_fold.
>>
>
> Done, as suggested.

The pattern in that function is to treat all *_DIV_EXPR the same; I
don't think we need to break that pattern with this patch.  So please
move the new code after the other DIV case labels.  With that the C++
changes are OK.

Jason


Re: [PATCH] Fix documentation and a ctor in gcov.c

2017-05-03 Thread Nathan Sidwell

On 05/02/2017 11:37 AM, Martin Liška wrote:

On 04/28/2017 07:23 PM, Nathan Sidwell wrote:

Write proper member initializers please.


Hi.

Done that, patch can bootstrap on ppc64le-redhat-linux and survives regression 
tests.
I consider the patch as pre-approved.


yes, thanks!


--
Nathan Sidwell


[PATCH v2] C++: Add fix-it hints for -Wold-style-cast

2017-05-03 Thread David Malcolm
On Thu, 2017-04-27 at 23:03 +0200, Marek Polacek wrote:
> On Thu, Apr 27, 2017 at 05:10:24PM -0400, David Malcolm wrote:
> > +  /* First try const_cast.  */
> > +  trial = build_const_cast (dst_type, orig_expr, 0 /* complain
> > */);
> > +  if (trial != error_mark_node)
> > +return "const_cast";
> > +
> > +  /* If that fails, try static_cast.  */
> > +  trial = build_static_cast (dst_type, orig_expr, 0 /* complain
> > */);
> > +  if (trial != error_mark_node)
> > +return "static_cast";
> > +
> > +  /* Finally, try reinterpret_cast.  */
> > +  trial = build_reinterpret_cast (dst_type, orig_expr, 0 /*
> > complain */);
> > +  if (trial != error_mark_node)
> > +return "reinterpret_cast";
> 
> I think you'll want tf_none instead of 0 /* complain */ in these.
> 
>   Marek

Thanks.

Here's an updated version of the patch.

Changes since v1:
- updated expected fixit-formatting (the new fix-it printer in
  r247548 handles this properly now)
- added new test cases as suggested by Florian
- use "tf_none" rather than "0 /* complain */"

Successfully bootstrapped®rtested on x86_64-pc-linux-gnu.

OK for trunk?

gcc/cp/ChangeLog:
* parser.c (get_cast_suggestion): New function.
(maybe_add_cast_fixit): New function.
(cp_parser_cast_expression): Capture the location of the closing
parenthesis.  Call maybe_add_cast_fixit when emitting warnings
about old-style casts.

gcc/testsuite/ChangeLog:
* g++.dg/other/old-style-cast-fixits.C: New test case.
---
 gcc/cp/parser.c| 93 -
 gcc/testsuite/g++.dg/other/old-style-cast-fixits.C | 95 ++
 2 files changed, 186 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/other/old-style-cast-fixits.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 4714bc6..2f83aa9 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -8633,6 +8633,85 @@ cp_parser_tokens_start_cast_expression (cp_parser 
*parser)
 }
 }
 
+/* Try to find a legal C++-style cast to DST_TYPE for ORIG_EXPR, trying them
+   in the order: const_cast, static_cast, reinterpret_cast.
+
+   Don't suggest dynamic_cast.
+
+   Return the first legal cast kind found, or NULL otherwise.  */
+
+static const char *
+get_cast_suggestion (tree dst_type, tree orig_expr)
+{
+  tree trial;
+
+  /* Reuse the parser logic by attempting to build the various kinds of
+ cast, with "complain" disabled.
+ Identify the first such cast that is valid.  */
+
+  /* Don't attempt to run such logic within template processing.  */
+  if (processing_template_decl)
+return NULL;
+
+  /* First try const_cast.  */
+  trial = build_const_cast (dst_type, orig_expr, tf_none);
+  if (trial != error_mark_node)
+return "const_cast";
+
+  /* If that fails, try static_cast.  */
+  trial = build_static_cast (dst_type, orig_expr, tf_none);
+  if (trial != error_mark_node)
+return "static_cast";
+
+  /* Finally, try reinterpret_cast.  */
+  trial = build_reinterpret_cast (dst_type, orig_expr, tf_none);
+  if (trial != error_mark_node)
+return "reinterpret_cast";
+
+  /* No such cast possible.  */
+  return NULL;
+}
+
+/* If -Wold-style-cast is enabled, add fix-its to RICHLOC,
+   suggesting how to convert a C-style cast of the form:
+
+ (DST_TYPE)ORIG_EXPR
+
+   to a C++-style cast.
+
+   The primary range of RICHLOC is asssumed to be that of the original
+   expression.  OPEN_PAREN_LOC and CLOSE_PAREN_LOC give the locations
+   of the parens in the C-style cast.  */
+
+static void
+maybe_add_cast_fixit (rich_location *rich_loc, location_t open_paren_loc,
+ location_t close_paren_loc, tree orig_expr,
+ tree dst_type)
+{
+  /* This function is non-trivial, so bail out now if the warning isn't
+ going to be emitted.  */
+  if (!warn_old_style_cast)
+return;
+
+  /* Try to find a legal C++ cast, trying them in order:
+ const_cast, static_cast, reinterpret_cast.  */
+  const char *cast_suggestion = get_cast_suggestion (dst_type, orig_expr);
+  if (!cast_suggestion)
+return;
+
+  /* Replace the open paren with "CAST_SUGGESTION<".  */
+  pretty_printer pp;
+  pp_printf (&pp, "%s<", cast_suggestion);
+  rich_loc->add_fixit_replace (open_paren_loc, pp_formatted_text (&pp));
+
+  /* Replace the close paren with "> (".  */
+  rich_loc->add_fixit_replace (close_paren_loc, "> (");
+
+  /* Add a closing paren after the expr (the primary range of RICH_LOC).  */
+  rich_loc->add_fixit_insert_after (")");
+}
+
+
 /* Parse a cast-expression.
 
cast-expression:
@@ -8668,6 +8747,7 @@ cp_parser_cast_expression (cp_parser *parser, bool 
address_p, bool cast_p,
   /* Consume the `('.  */
   cp_token *open_paren = cp_lexer_consume_token (parser->lexer);
   location_t open_paren_loc = open_paren->location;
+  location_t close_paren_loc = UNKNOWN_LOCATION;
 
   /* A very tricky bit is that `(struct S) { 3 }' is a
 compound

Re: [PATCH, v3] Fix PR51513, switch statement with default case containing __builtin_unreachable leads to wild branch

2017-05-03 Thread Richard Biener
On Wed, 26 Apr 2017, Peter Bergner wrote:

> On 4/20/17 8:26 AM, Peter Bergner wrote:
> > On 4/20/17 2:37 AM, Richard Biener wrote:
> >> Ok, so I think we should ensure that we remove the regular cases
> >> with unreachable destination, like in
> >>
> >> switch (i)
> >> {
> >> case 0:
> >>   __builtin_unreachable ();
> >> default:;
> >> }
> >>
> >> and handle default with __builtin_unreachable () from switch
> >> expansion by omitting the range check for the jump table
> >> indexing (and choose a non-default case label for gaps).
> > 
> > Ok, I'll modify optimize_unreachable() to remove the unreachable
> > case statements, and leave the unreachable default case statement
> > for switch expansion so we can eliminate the range check.  Thanks.
> 
> Ok, I lied. :-)  It ended up being a fair amount of code to remove
> the unreachable case statements within optimize_unreachable().
> Instead, I hooked into group_case_labels_stmt() which is much
> simpler!  That code eliminates all case statements that have the
> same destination as the default case statement.  With the patch,
> we also eliminate case statements that are unreachable, thus
> treating them like default cases.  This has the added benefit of
> being able to reduce the size of the dispatch table, if those cases
> were at the end of the dispatch table entries.
> 
> One difference from the last patch is that I am no longer setting
> default_label to NULL when we emit a decision tree.  I noticed that
> the decision tree code seemed to generate slightly better code for
> some of my unit tests if I left it alone.  This simplified the
> patch somewhat by removing the changes to emit_case_nodes().
> 
> This passed bootstrap and regtesting on powerpc64le-linux and
> x86_64-linux.  Is this ok for trunk now?  If so, I can hold off
> committing it until GCC 7 has been released if that helps.

Can you do the gimple_unreachable_bb_p check earlier in
expand_case so it covers the emit_case_decision_tree path as well
(and verify that works, of course)?  So basically right at

  /* Find the default case target label.  */
  default_label = jump_target_rtx
  (CASE_LABEL (gimple_switch_default_label (stmt)));
  edge default_edge = EDGE_SUCC (bb, 0);
  int default_prob = default_edge->probability;

handle this case.

As for Bernhards concern I share this -- please intead make the
interface take either a gimple_seq or a gimple_stmt_iterator
instead of a basic-block.  That makes it more obvious you
can't use things like gsi_after_labels.  Also I think it's more
natural to work backwards as the last stmt in the sequence
_has_ to be __builtin_unreachable () and thus checking that first
is the cheapest thing to do given that in most cases it will
not be __builtin_unreachable () (but a noreturn call or an
inifinite loop).

Thus, name it gimple_seq_unreachable_p.

Thanks,
Richard.

> Peter
> 
> 
> gcc/
>   * tree-cfg.c (gimple_unreachable_bb_p): New function.
>   (assert_unreachable_fallthru_edge_p): Use it.
>   (group_case_labels_stmt): Likewise.
>   * tree-cfg.h: Prototype it.
>   * stmt.c: Include cfghooks.h and tree-cfg.h.
>   (emit_case_dispatch_table) : New local variable.
>   Use it to fill dispatch table gaps.
>   Test for default_label before updating probabilities.
>   (expand_case) : Remove unneeded initialization.
>   Test for unreachable default case statement and remove its edge.
>   Set default_label accordingly.
>   * gcc/tree-ssa-ccp.c (optimize_unreachable): Update comment.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/pr51513.c: New test.
>   * gcc.dg/predict-13.c: Replace __builtin_unreachable() with
>   __builtin_abort().
>   * gcc.dg/predict-14.c: Likewise.
> 
> Index: gcc/tree-cfg.c
> ===
> --- gcc/tree-cfg.c(revision 247291)
> +++ gcc/tree-cfg.c(working copy)
> @@ -452,6 +452,37 @@ computed_goto_p (gimple *t)
> && TREE_CODE (gimple_goto_dest (t)) != LABEL_DECL);
>  }
>  
> +/* Returns true if the basic block BB has no successors and only contains
> +   a call to __builtin_unreachable ().  */
> +
> +bool
> +gimple_unreachable_bb_p (basic_block bb)
> +{
> +  gimple_stmt_iterator gsi;
> +  gimple_seq stmts = bb_seq (bb);
> +  gimple *stmt;
> +
> +  if (EDGE_COUNT (bb->succs) != 0)
> +return false;
> +
> +  gsi = gsi_start (stmts);
> +  while (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
> +gsi_next (&gsi);
> +
> +  if (gsi_end_p (gsi))
> +return false;
> +
> +  stmt = gsi_stmt (gsi);
> +  while (is_gimple_debug (stmt) || gimple_clobber_p (stmt))
> +{
> +  gsi_next (&gsi);
> +  if (gsi_end_p (gsi))
> + return false;
> +  stmt = gsi_stmt (gsi);
> +}
> +  return gimple_call_builtin_p (stmt, BUILT_IN_UNREACHABLE);
> +}
> +
>  /* Returns true for edge E where e->src ends with a GIMPLE_COND and
> the other edge points to a bb with just __builtin_unreachable ().

Re: [PATCH GCC8][17/33]Treat complex cand step as invriant expression

2017-05-03 Thread Richard Biener
On Tue, Apr 18, 2017 at 12:46 PM, Bin Cheng  wrote:
> Hi,
> We generally need to compute cand step in loop preheader and use it in loop 
> body.
> Unless it's an SSA_NAME of constant integer, an invariant expression is 
> needed.

I'm confused as to what this patch does.  Comments talk about "Handle step as"
but then we print "Depend on inv...".  And we share bitmaps, well it seems

+ find_inv_vars (data, &step, &cand->inv_vars);
+
+ iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
+ /* Share bitmap between inv_vars and inv_exprs for cand.  */
+ if (inv_expr != NULL)
+   {
+ cand->inv_exprs = cand->inv_vars;
+ cand->inv_vars = NULL;
+ if (cand->inv_exprs)
+   bitmap_clear (cand->inv_exprs);
+ else
+   cand->inv_exprs = BITMAP_ALLOC (NULL);
+
+ bitmap_set_bit (cand->inv_exprs, inv_expr->id);

just shares the bitmap allocation (and leaks cand->inv_exprs?).

Note that generally it might be cheaper to use bitmap_head instead of
bitmap in the various structures (and then bitmap_initialize ()), this
saves one indirection.

Anyway, the relation between inv_vars and inv_exprs is what confuses me.
Maybe it's the same as for cost_pair?  invariants vs. loop invariants?
whatever that means...

That is, can you expand the comments in cost_pair / iv_cand for inv_vars
vs. inv_exprs, esp what "invariant" actually means?

Thanks,
Richard.

> Thanks,
> bin
>
> 2017-04-11  Bin Cheng  
>
> * tree-ssa-loop-ivopts.c (struct iv_cand): New field inv_exprs.
> (dump_cand): Support iv_cand.inv_exprs.
> (add_candidate_1): Record invariant exprs in iv_cand.inv_exprs
> for candidates.
> (iv_ca_set_no_cp, iv_ca_set_cp, free_loop_data): Support
> iv_cand.inv_exprs.


Re: Update ipa-cp to new time metrics

2017-05-03 Thread Martin Jambor
Hi,

On Tue, May 02, 2017 at 11:33:28AM +0200, Jan Hubicka wrote:
> Hi,
> this patch makes ipa-cp to use nonspecialized time as a base for decision 
> about
> cloning.  I wonder about the capping - we perhaps want to use sreals further 
> in
> the code because time differences can be large (with profile feedback). But I
> guess this can be done incrementally - main point of the patch is to update
> interfaces from ipa-analysis.
> 
> Bootstrapped/regtested x86_64-linux, OK?

Yes it is, thanks.

Martin

> 
> Honza
> 
>   * ipa-cp.c (perform_estimation_of_a_value): Drop base_time parameter;
>   update use of estimate_ipcp_clone_size_and_time.
>   (estimate_local_effects): Update use of
>   estimate_ipcp_clone_size_and_time and perform_estimation_of_a_value.
>   * ipa-inline.h (estimate_ipcp_clone_size_and_time): Update prototype.
>   * ipa-inline-analysis.c (estimate_ipcp_clone_size_and_time):
>   Return nonspecialized time.


Re: [PATCH] have -Wformat-overflow handle -fexec-charset (PR 80503)

2017-05-03 Thread Christophe Lyon
Hi,


On 29 April 2017 at 19:56, Andreas Schwab  wrote:
> On Apr 28 2017, Martin Sebor  wrote:
>
>> +void test_width_and_precision_out_of_range (char *d)
>> +{
>> +#if __LONG_MAX__ == 2147483647
>> +#  define   MAX_P1_STR "2147483648"
>> +#elif __LONG_MAX__ == 9223372036854775807
>> +#  define MAX_P1_STR "9223372036854775808"
>> +#endif
>> +
>> +  T ("%" MAX_P1_STR "i", 0);/* { dg-warning "width out of range" } */
>> +  /* { dg-warning "result to exceed .INT_MAX. " "" { target *-*-* } .-1 } */
>> +  T ("%." MAX_P1_STR "i", 0);   /* { dg-warning "precision out of range" } 
>> */
>
> FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c  (test for warnings, line 123)
> FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c  (test for warnings, line 125)
> FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for excess errors)
> Excess errors:
> /daten/aranym/gcc/gcc-20170429/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c:125:3:
>  warning: '%.2147483648i' directive output of 2147483648 bytes causes result 
> to exceed 'INT_MAX' [-Wformat-overflow=]
>
> Andreas.

I've noticed the same errors on arm* targets, if it's easier to reproduce.

Thanks,

Christophe

>
> --
> Andreas Schwab, sch...@linux-m68k.org
> GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
> "And now for something completely different."


Re: [PATCH GCC8][01/33]Handle TRUNCATE between tieable modes in rtx_cost

2017-05-03 Thread Christophe Lyon
Hi Bin,

On 3 May 2017 at 12:12, Bin.Cheng  wrote:
> On Wed, May 3, 2017 at 11:09 AM, Kyrill Tkachov
>  wrote:
>> Hi Bin,
>>
>>
>> On 03/05/17 11:02, Bin.Cheng wrote:
>>>
>>> On Wed, May 3, 2017 at 9:38 AM, Bin.Cheng  wrote:

 On Wed, May 3, 2017 at 7:17 AM, Eric Botcazou 
 wrote:
>>
>> 2017-04-11  Bin Cheng  
>>
>>* rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes.
>
> This breaks bootstrap with RTL checking:
>
> /home/eric/build/gcc/native/./gcc/xgcc
> -B/home/eric/build/gcc/native/./gcc/ -
> nostdinc -x c /dev/null -S -o /dev/null -fself-
> test=/home/eric/svn/gcc/gcc/testsuite/selftests
> cc1: internal compiler error: RTL check: expected code 'subreg', have
> 'truncate' in rtx_cost, at rtlanal.c:4169
> 0xbae338 rtl_check_failed_code1(rtx_def const*, rtx_code, char const*,
> int,
> char const*)
>  /home/eric/svn/gcc/gcc/rtl.c:829
> 0xbbc9b4 rtx_cost(rtx_def*, machine_mode, rtx_code, int, bool)
>  /home/eric/svn/gcc/gcc/rtlanal.c:4169
> 0x8517e6 set_src_cost
>  /home/eric/svn/gcc/gcc/rtl.h:2685
> 0x8517e6 init_expmed_one_conv
>  /home/eric/svn/gcc/gcc/expmed.c:142
> 0x8517e6 init_expmed_one_mode
>  /home/eric/svn/gcc/gcc/expmed.c:209
> 0x853fb2 init_expmed()
>  /home/eric/svn/gcc/gcc/expmed.c:270
> 0xc45974 backend_init_target
>  /home/eric/svn/gcc/gcc/toplev.c:1665
> 0xc45974 initialize_rtl()
>
 Sorry for disturbing, I will revert this if can't fix today.
>>>
>>> It looks bogus and I couldn't find the motivating case for it, so
>>> revert with attached patch.  Build on x86 and commit as obvious.
>>>
>>> Thanks,
>>> bin
>>> 2017-05-03  Bin Cheng  
>>>
>>>  Revert
>>>  2017-05-02  Bin Cheng  
>>>  * rtlanal.c (rtx_cost): Handle TRUNCATE between tieable modes.
>>
>>
>> Looking at the code in the patch...
>>
>> +case TRUNCATE:
>> +  /* If we can tie these modes, make this cheap.  */
>> +  if (MODES_TIEABLE_P (mode, GET_MODE (SUBREG_REG (x
>>
>> 'code' here is GET_CODE (x) and in this case it is TRUNCATE.
>> SUBREG_REG asserts (in RTL checking mode) that its argument is a SUBREG, so
>> passing it a TRUNCATE rtx would cause
>> the checking failure Eric reported. I think you meant to use XEXP (x, 0)
>> instead of SUBREG_REG (x) ?
> Yes, I guess so.  Reverted since I couldn't find the original test.
>

If it helps, your patch also introduced regressions on some arm targets.
See: 
http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/247509/report-build-info.html

gcc.c-torture/execute/pr53645-2.c   -O2  (test for excess errors)
gcc.c-torture/execute/pr53645-2.c   -O2 -flto
-fno-use-linker-plugin -flto-partition=none  (test for excess errors)
gcc.c-torture/execute/pr53645-2.c   -O2 -flto -fuse-linker-plugin
-fno-fat-lto-objects  (test for excess errors)
gcc.c-torture/execute/pr53645-2.c   -O3 -fomit-frame-pointer
-funroll-loops -fpeel-loops -ftracer -finline-functions  (test for
excess errors)
gcc.c-torture/execute/pr53645-2.c   -O3 -g  (test for excess errors)

Thanks,

Christophe

> Thanks,
> bin
>>
>> Thanks,
>> Kyrill
>>


Re: [PATCH GCC8][03/33]Refactor invariant variable/expression handling

2017-05-03 Thread Christophe Lyon
Hi Bin,


On 24 April 2017 at 12:26, Richard Biener  wrote:
> On Tue, Apr 18, 2017 at 12:38 PM, Bin Cheng  wrote:
>> Hi,
>> This patch refactors how invariant variable/expressions are handled.  Now 
>> they are
>> recorded in the same kind data structure and handled similarly, which makes 
>> code
>> easier to understand.
>>
>> Is it OK?
>

This patch (r247512) caused regression on some arm
targets/cpu/fpu/runtestflags combinations:
FAIL:gcc.target/arm/ivopts.c object-size text <= 32

See: 
http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/247512/report-build-info.html

Thanks,

Christophe


> Ok.
>
> Richard.
>
>> Thanks,
>> bin
>>
>> 2017-04-11  Bin Cheng  
>>
>> * tree-ssa-loop-ivopts.c (struct cost_pair): Rename depends_on to
>> inv_vars.  Add inv_exprs.
>> (struct iv_cand): Rename depends_on to inv_vars.
>> (struct ivopts_data): Rename max_inv_id/n_invariant_uses to
>> max_inv_var_id/n_inv_var_uses.  Move max_inv_expr_id around.
>> Refactor field used_inv_exprs from has_map to array n_inv_expr_uses.
>> (dump_cand): Dump inv_vars.
>> (tree_ssa_iv_optimize_init): Support inv_vars and inv_exprs.
>> (record_invariant, find_depends, add_candidate_1): Ditto.
>> (set_group_iv_cost, force_var_cost): Ditto.
>> (split_address_cost, ptr_difference_cost, difference_cost): Ditto.
>> (get_computation_cost_at, get_computation_cost): Ditto.
>> (determine_group_iv_cost_generic): Ditto.
>> (determine_group_iv_cost_address): Ditto.
>> (determine_group_iv_cost_cond, autoinc_possible_for_pair): Ditto.
>> (determine_group_iv_costs): Ditto.
>> (iv_ca_recount_cost): Update call to ivopts_global_cost_for_size.
>> (iv_ca_set_remove_invariants): Renamed to ...
>> (iv_ca_set_remove_invs): ... this.  Support inv_vars and inv_exprs.
>> (iv_ca_set_no_cp): Use iv_ca_set_remove_invs.
>> (iv_ca_set_add_invariants):  Renamed to ...
>> (iv_ca_set_add_invs): ... this.  Support inv_vars and inv_exprs.
>> (iv_ca_set_cp): Use iv_ca_set_add_invs.
>> (iv_ca_has_deps): Support inv_vars and inv_exprs.
>> (iv_ca_new, iv_ca_free, iv_ca_dump, free_loop_data): Ditto.
>> (create_new_ivs): Remove useless dump.
>>
>> gcc/testsuite/ChangeLog
>> 2017-04-11  Bin Cheng  
>>
>> * g++.dg/tree-ssa/ivopts-3.C: Adjust test string.


Re: [PATCH GCC8][03/33]Refactor invariant variable/expression handling

2017-05-03 Thread Bin.Cheng
On Wed, May 3, 2017 at 3:38 PM, Christophe Lyon
 wrote:
> Hi Bin,
>
>
> On 24 April 2017 at 12:26, Richard Biener  wrote:
>> On Tue, Apr 18, 2017 at 12:38 PM, Bin Cheng  wrote:
>>> Hi,
>>> This patch refactors how invariant variable/expressions are handled.  Now 
>>> they are
>>> recorded in the same kind data structure and handled similarly, which makes 
>>> code
>>> easier to understand.
>>>
>>> Is it OK?
>>
>
> This patch (r247512) caused regression on some arm
> targets/cpu/fpu/runtestflags combinations:
> FAIL:gcc.target/arm/ivopts.c object-size text <= 32
>
> See: 
> http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/247512/report-build-info.html
Hi Christophe,
Thanks very much for reporting.  Seems it's more than pure
refactoring.  I will investigate it.

Thanks,
bin
>
> Thanks,
>
> Christophe
>
>
>> Ok.
>>
>> Richard.
>>
>>> Thanks,
>>> bin
>>>
>>> 2017-04-11  Bin Cheng  
>>>
>>> * tree-ssa-loop-ivopts.c (struct cost_pair): Rename depends_on to
>>> inv_vars.  Add inv_exprs.
>>> (struct iv_cand): Rename depends_on to inv_vars.
>>> (struct ivopts_data): Rename max_inv_id/n_invariant_uses to
>>> max_inv_var_id/n_inv_var_uses.  Move max_inv_expr_id around.
>>> Refactor field used_inv_exprs from has_map to array n_inv_expr_uses.
>>> (dump_cand): Dump inv_vars.
>>> (tree_ssa_iv_optimize_init): Support inv_vars and inv_exprs.
>>> (record_invariant, find_depends, add_candidate_1): Ditto.
>>> (set_group_iv_cost, force_var_cost): Ditto.
>>> (split_address_cost, ptr_difference_cost, difference_cost): Ditto.
>>> (get_computation_cost_at, get_computation_cost): Ditto.
>>> (determine_group_iv_cost_generic): Ditto.
>>> (determine_group_iv_cost_address): Ditto.
>>> (determine_group_iv_cost_cond, autoinc_possible_for_pair): Ditto.
>>> (determine_group_iv_costs): Ditto.
>>> (iv_ca_recount_cost): Update call to ivopts_global_cost_for_size.
>>> (iv_ca_set_remove_invariants): Renamed to ...
>>> (iv_ca_set_remove_invs): ... this.  Support inv_vars and inv_exprs.
>>> (iv_ca_set_no_cp): Use iv_ca_set_remove_invs.
>>> (iv_ca_set_add_invariants):  Renamed to ...
>>> (iv_ca_set_add_invs): ... this.  Support inv_vars and inv_exprs.
>>> (iv_ca_set_cp): Use iv_ca_set_add_invs.
>>> (iv_ca_has_deps): Support inv_vars and inv_exprs.
>>> (iv_ca_new, iv_ca_free, iv_ca_dump, free_loop_data): Ditto.
>>> (create_new_ivs): Remove useless dump.
>>>
>>> gcc/testsuite/ChangeLog
>>> 2017-04-11  Bin Cheng  
>>>
>>> * g++.dg/tree-ssa/ivopts-3.C: Adjust test string.


Re: [PATCH] handle sprintf(d, "%s", ...) in gimple-ssa-sprintf.c

2017-05-03 Thread Martin Sebor

On 05/03/2017 04:20 AM, Richard Biener wrote:

On Tue, May 2, 2017 at 4:41 PM, Martin Sebor  wrote:

FWIW, my fix for bug 79062 is only partial (it gets the pass
to run but the warnings are still not issued).  I don't quite
understand what prevents the warning flag(s) from getting set
when -flto is used.  This seems to be a bigger problem than
just the sprintf pass not doing something just right.



I've never dug deeply in the LTO stuff, but I believe we stream the
compiler
flags, so it could be something there.



We do.


Alternately you might be running into a case where in LTO mode we
recreate
base types.  Look for a type equality tester that goes beyond just
testing
pointer equality.

ie, in LTO I think we'll create a type based on the streamed data, but I
also think we'll create various basic types.  Thus in LTO mode pointer
equality may not be sufficient.



We make sure that for most basic types we end up re-using them where
possible.
char_type_node is an example where that generally doesn't work because
it's
value depends on a command-line flag.



That answers the first part of the question of why the sprintf
pass wouldn't run (or do anything) with -flto.   With it fixed
(as in fold-const.c or tree-ssa-strlen.c as you suggested in
bug 79602) it runs and the optimization does its job, but no
warnings are issued.  The wan_foo_flags for warnings that are
enabled implicitly (e.g., by -Wall or -Wextra on the command
line) are clear.  There seem to be dependencies between warnings
in c.opt that ignore LTO (as a language), but even with those
corrected (i.e., with LTO added as a language to -Wformat and
-Wall) the flags are still clear when LTO runs.  Does that ring
any bells for you?


You can look at the lto_opts section (it's just a string) and see
that we seem to fail to pass through -Wall (or any warning option
I tried).  This is because

  /* Also drop all options that are handled by the driver as well,
 which includes things like -o and -v or -fhelp for example.
 We do not need those.  The only exception is -foffload option, if we
 write it in offload_lto section.  Also drop all diagnostic options.  */
  if ((cl_options[option->opt_index].flags & (CL_DRIVER|CL_WARNING))
  && (!lto_stream_offload_p || option->opt_index != OPT_foffload_))
continue;

which means you have to explicitely enable diagnostics you want at
link time at the moment.

If you want to change that you have to do some changes to lto-wrapper.c
as for example only pass through warning options that are set on all
input files (warning options are not kept per function).


Great, thanks for the pointer!  I might tackle that at some point.

Jeff, given that we now understand why the warnings are suppressed
(i.e., the root cause of the rest of bug 79062), are you okay with
treating that independently of this PR (bug 77671) and committing
this patch (including the two-line fix to enable the sprintf return
value optimization with LTO)?

Martin


[PATCH v8] add -fpatchable-function-entry=N,M option

2017-05-03 Thread Torsten Duwe
On Wed, Mar 01, 2017 at 01:35:52PM +, Richard Earnshaw (lists) wrote:
> >>
> >> How about --fpatchable-function-entry=?
> > 
> I haven't reviewed it yet.  I'm not really planning to spend any more
> time on this until stage1 re-opens.

So I guess this is about now? Here is version 8, which is functionally identical
to v6 (v7 tried to guard the gen_nop call, which you wrote isn't neccessary).
The longer names required some reformatting.

Torsten

gcc/c-family/ChangeLog
2017-05-03  Torsten Duwe  

* c-attribs.c (c_common_attribute_table): Add entry for
"patchable_function_entry".

gcc/lto/ChangeLog
2017-05-03  Torsten Duwe  

* lto-lang.c (lto_attribute_table): Add entry for
"patchable_function_entry".

gcc/ChangeLog
2017-05-03  Torsten Duwe  

* common.opt: Introduce -fpatchable-function-entry
command line option, and its variables function_entry_patch_area_size
and function_entry_patch_area_start.
* opts.c (common_handle_option): Add -fpatchable_function_entry_ case,
including a two-value parser.
* target.def (print_patchable_function_entry): New target hook.
* targhooks.h (default_print_patchable_function_entry): New function.
* targhooks.c (default_print_patchable_function_entry): Likewise.
* toplev.c (process_options): Switch off IPA-RA if
patchable function entries are being generated.
* varasm.c (assemble_start_function): Look at the
patchable-function-entry command line switch and current
function attributes and maybe generate NOP instructions by
calling the print_patchable_function_entry hook.
* doc/extend.texi: Document patchable_function_entry attribute.
* doc/invoke.texi: Document -fpatchable_function_entry
command line option.
* doc/tm.texi.in (TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY):
New target hook.
* doc/tm.texi: Likewise.

gcc/testsuite/ChangeLog
2017-05-03  Torsten Duwe  

* c-c++-common/attribute-patchable_function_entry-1.c: New test.

diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index f2a88e147ba..31137ce0433 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -139,6 +139,8 @@ static tree handle_bnd_variable_size_attribute (tree *, 
tree, tree, int, bool *)
 static tree handle_bnd_legacy (tree *, tree, tree, int, bool *);
 static tree handle_bnd_instrument (tree *, tree, tree, int, bool *);
 static tree handle_fallthrough_attribute (tree *, tree, tree, int, bool *);
+static tree handle_patchable_function_entry_attribute (tree *, tree, tree,
+  int, bool *);
 
 /* Table of machine-independent attributes common to all C-like languages.
 
@@ -345,6 +347,9 @@ const struct attribute_spec c_common_attribute_table[] =
  handle_bnd_instrument, false },
   { "fallthrough",   0, 0, false, false, false,
  handle_fallthrough_attribute, false },
+  { "patchable_function_entry",1, 2, true, false, false,
+ handle_patchable_function_entry_attribute,
+ false },
   { NULL, 0, 0, false, false, false, NULL, false }
 };
 
@@ -3173,3 +3178,10 @@ handle_fallthrough_attribute (tree *, tree name, tree, 
int,
   *no_add_attrs = true;
   return NULL_TREE;
 }
+
+static tree
+handle_patchable_function_entry_attribute (tree *, tree, tree, int, bool *)
+{
+  /* Nothing to be done here.  */
+  return NULL_TREE;
+}
diff --git a/gcc/common.opt b/gcc/common.opt
index b7ece0c73e1..1b698ef4fc5 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -163,6 +163,13 @@ bool flag_stack_usage_info = false
 Variable
 int flag_debug_asm
 
+; How many NOP insns to place at each function entry by default
+Variable
+HOST_WIDE_INT function_entry_patch_area_size
+
+; And how far the real asm entry point is into this area
+Variable
+HOST_WIDE_INT function_entry_patch_area_start
 
 ; Balance between GNAT encodings and standard DWARF to emit.
 Variable
@@ -2022,6 +2029,10 @@ fprofile-reorder-functions
 Common Report Var(flag_profile_reorder_functions)
 Enable function reordering that improves code placement.
 
+fpatchable-function-entry=
+Common Joined Optimization
+Insert NOP instructions at each function entry.
+
 frandom-seed
 Common Var(common_deferred_options) Defer
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1255995eb78..d09ccd90c42 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -3083,6 +3083,25 @@ that affect more than one function.
 This attribute should be used for debugging purposes only.  It is not
 suitable in production code.
 
+@item patchable_function_entry
+@cindex @code{patchable_function_entry} function attribute
+@cindex extra NOP instructions at the function entry point
+In case the target's text segment can be made writable at run time by
+an

Re: [PATCH] have -Wformat-overflow handle -fexec-charset (PR 80503)

2017-05-03 Thread Martin Sebor

On 05/03/2017 08:22 AM, Christophe Lyon wrote:

Hi,


On 29 April 2017 at 19:56, Andreas Schwab  wrote:

On Apr 28 2017, Martin Sebor  wrote:


+void test_width_and_precision_out_of_range (char *d)
+{
+#if __LONG_MAX__ == 2147483647
+#  define   MAX_P1_STR "2147483648"
+#elif __LONG_MAX__ == 9223372036854775807
+#  define MAX_P1_STR "9223372036854775808"
+#endif
+
+  T ("%" MAX_P1_STR "i", 0);/* { dg-warning "width out of range" } */
+  /* { dg-warning "result to exceed .INT_MAX. " "" { target *-*-* } .-1 } */
+  T ("%." MAX_P1_STR "i", 0);   /* { dg-warning "precision out of range" } */


FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c  (test for warnings, line 123)
FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c  (test for warnings, line 125)
FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for excess errors)
Excess errors:
/daten/aranym/gcc/gcc-20170429/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c:125:3:
 warning: '%.2147483648i' directive output of 2147483648 bytes causes result to 
exceed 'INT_MAX' [-Wformat-overflow=]

Andreas.


I've noticed the same errors on arm* targets, if it's easier to reproduce.


Thanks.  I committed a trivial fix for this on Monday
(https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00036.html).
I don't see the failures in recent test results for the few
ILP32 targets I've checked so I'm hoping they're gone but if
they persist on some others please let me know.

Martin


Re: [PATCH] have -Wformat-overflow handle -fexec-charset (PR 80503)

2017-05-03 Thread Christophe Lyon
On 3 May 2017 at 16:54, Martin Sebor  wrote:
> On 05/03/2017 08:22 AM, Christophe Lyon wrote:
>>
>> Hi,
>>
>>
>> On 29 April 2017 at 19:56, Andreas Schwab  wrote:
>>>
>>> On Apr 28 2017, Martin Sebor  wrote:
>>>
 +void test_width_and_precision_out_of_range (char *d)
 +{
 +#if __LONG_MAX__ == 2147483647
 +#  define   MAX_P1_STR "2147483648"
 +#elif __LONG_MAX__ == 9223372036854775807
 +#  define MAX_P1_STR "9223372036854775808"
 +#endif
 +
 +  T ("%" MAX_P1_STR "i", 0);/* { dg-warning "width out of range" }
 */
 +  /* { dg-warning "result to exceed .INT_MAX. " "" { target *-*-* } .-1
 } */
 +  T ("%." MAX_P1_STR "i", 0);   /* { dg-warning "precision out of
 range" } */
>>>
>>>
>>> FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c  (test for warnings, line
>>> 123)
>>> FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c  (test for warnings, line
>>> 125)
>>> FAIL: gcc.dg/tree-ssa/builtin-sprintf-warn-18.c (test for excess errors)
>>> Excess errors:
>>>
>>> /daten/aranym/gcc/gcc-20170429/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-18.c:125:3:
>>> warning: '%.2147483648i' directive output of 2147483648 bytes causes result
>>> to exceed 'INT_MAX' [-Wformat-overflow=]
>>>
>>> Andreas.
>>
>>
>> I've noticed the same errors on arm* targets, if it's easier to reproduce.
>
>
> Thanks.  I committed a trivial fix for this on Monday
> (https://gcc.gnu.org/ml/gcc-patches/2017-05/msg00036.html).
> I don't see the failures in recent test results for the few
> ILP32 targets I've checked so I'm hoping they're gone but if
> they persist on some others please let me know.
>
Indeed, I confirm your commit r247444 fixed the error.
I didn't notice your message because it was in a different thread.

Thanks,

Christophe

> Martin


[PATCH] Remove am33_2.0-linux from config-list.mk

2017-05-03 Thread Jeff Law


am33_2.0-linux was removed from glibc some time ago.  Furthermore, for 
the testing done by config-list.mk builds, we get almost identical 
coverage from mn10300-elf.


In fact, mn10300-elf is actually better for testing purposes -- we can 
build newlib to do a first level test of the code generator and we can 
run the testsuite with the mn103 simulator if we were so inclined.  We 
can't do either with the -linux configuration.



This patch removes am33_2.0-linux from config-list.mk.  One could argue 
the port should be deprecated & removed.   I'm not sure if that would 
affect linux kernel builds which still supports the mn103 platform, so 
I'm leaving it as-is for now.


Installing on the trunk,
Jeff

diff --git a/contrib/ChangeLog b/contrib/ChangeLog
index 655e7fb7b84..c26ced0eeaa 100644
--- a/contrib/ChangeLog
+++ b/contrib/ChangeLog
@@ -1,3 +1,8 @@
+2007-05-03  Jeff Law  
+
+   * config-list.mk (am33_2.0-linux): Remove from list of targets
+   to build.
+
 2017-05-02  Jakub Jelinek  
 
* gennews (files): Add files for GCC 7.
diff --git a/contrib/config-list.mk b/contrib/config-list.mk
index 0edc8a4b813..fee08b02a2b 100644
--- a/contrib/config-list.mk
+++ b/contrib/config-list.mk
@@ -32,7 +32,7 @@ GCC_SRC_DIR=../../gcc
 # v850e1-elf is rejected by config.sub
 LIST = aarch64-elf aarch64-linux-gnu aarch64-rtems \
   alpha-linux-gnu alpha-freebsd6 alpha-netbsd alpha-openbsd \
-  alpha64-dec-vms alpha-dec-vms am33_2.0-linux \
+  alpha64-dec-vms alpha-dec-vms \
   arc-elf32OPT-with-cpu=arc600 arc-elf32OPT-with-cpu=arc700 \
   arc-linux-uclibcOPT-with-cpu=arc700 arceb-linux-uclibcOPT-with-cpu=arc700 \
   arm-wrs-vxworks arm-netbsdelf \


Re: [PATCH 2/7] clean up quoting problems - gcc (PR 80280 et al.)

2017-05-03 Thread Jeff Law

On 05/02/2017 08:37 PM, Martin Sebor wrote:

The attached patch adds missing quoting to diagnostic directives
in files in the top-level gcc/ directory.

Martin

gcc-80280-gcc.diff


gcc/ChangeLog:

* builtins.c (expand_builtin_object_size): Add missing quoting to
%D and like directives.
* hsa-gen.c (hsa_type_for_scalar_tree_type): Same.
(hsa_type_for_tree_type): Same.
(verify_function_arguments): Same.
* symtab.c (symbol_table::change_decl_assembler_name): Same.
* varasm.c (get_section): Same.
(mark_weak): Same.
OK.  In fact, go ahead and consider all changes of this nature 
pre-approved.  I think that covers #2-#7 in this series.


Jeff


Re: [PATCH v2] Generate reproducible output independently of the build-path

2017-05-03 Thread Ximin Luo
Joseph Myers:
> On Tue, 11 Apr 2017, Ximin Luo wrote:
> 
>> Copyright disclaimer
>> 
>>
>> I dedicate these patches to the public domain by waiving all of my rights to
>> the work worldwide under copyright law, including all related and neighboring
>> rights, to the extent allowed by law.
>>
>> See https://creativecommons.org/publicdomain/zero/1.0/legalcode for full 
>> text.
>>
>> Please let me know if the above is insufficient and I will be happy to sign 
>> any
>> relevant forms.
> 
> I believe the FSF wants its own disclaimer forms signed as evidence code 
> is in the public domain.  The process for getting disclaimer forms is to 
> complete 
> https://git.savannah.gnu.org/cgit/gnulib.git/plain/doc/Copyright/request-disclaim.changes
>  
> and then you should be sent a disclaimer form for disclaiming the 
> particular set of changes you have completed (if you then make further 
> significant changes afterwards, the disclaimer form would then need 
> completing for them as well).
> 

I've now done this, and the copyright clerk at the FSF has told me that this is 
complete on their side as well.

Did any of you get a chance to look at the patch yet?

X

-- 
GPG: ed25519/56034877E1F87C35
GPG: rsa4096/1318EFAC5FBBDBCE
https://github.com/infinity0/pubkeys.git


Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)

2017-05-03 Thread Joseph Myers
On Tue, 2 May 2017, Martin Sebor wrote:

> In bug 80280 - Missing closing quote (%>) c/semantics.c and
> c/c-typeck.c, a translator points out one of a number of kinds
> of cosmetic problems that tend to come up late in development,
> during translation of GCC messages.  Other, arguably more minor,
> kinds of issues are caused by forgetting to use proper quoting
> when referencing tree nodes, such as %D or %T.
> 
> To help avoid these kinds of bugs, the attached patch enhances
> the -Wformat checker to detect these common quoting issues and
> report them as warnings.

As I see it, there are four kinds of format specifiers in this context:

* Specifiers that have no special interaction with quoting and may be used 
either quoted or unquoted.  For example, %d, %s and %E.  (%E is only of 
that kind when used for expressions - when hopefully it should only be 
used for simple expressions such as constants.  Uses for identifiers 
should be quoted; cases where there can be complex expressions should be 
replaced by use of location ranges.  Ideally identifiers and expressions 
should have different formats, so these cases can be distinguished and so 
identifiers can end up with a static type other than "tree".)

* Specifiers that should be quoted.  For example, %D.

* Specifiers that open quoting (%<).

* Specifiers that close quoting (%>).

(In addition, the q flag serves to quote the specifier it's applied to.)

> The remaining six patches in the series correct the problems
> highlighted by the warning and get GCC to bootstrap and pass
> regression tests on x86_64.  I suspect additional fixes will
> be needed to get GCC to bootstrap on other targets.  I'll do
> powerpc64le next but besides a general review I'm looking for
> suggestions how to do the same cleanup on all the other targets
> with the least disruption. (E.g., if there's a way to roll out
> a warning one target at a time I could introduce it under
> a suboption of -Wformat and enable the subobption with -Werror
> only on the targets that have already been verified.)

Use contrib/config-list.mk, with a native compiler with this patch in the 
PATH, to test building compilers for many configurations.  (No doubt 
you'll also find existing build issues, which may or may not be filed in 
Bugzilla.)

> +  /* Diagnose nested or unmatched quoting directives such as GCC's
> +  "%<...%<" and "%>...%>".  As a special case, a directive can
> +  be considered to both begin and end quoting (e.g., GCC's %E).
> +  Such a directive is recognized but not diagosed.  */

I don't think it makes conceptual sense to consider %E to both begin and 
end quoting.  I'd expect %E to have exactly the same begin_quote and 
end_quote (or whatever) data as %d and %s, because it has the same 
properties (may be used quoted or unquoted).

> diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h
> index 13ca8ea..8932861 100644
> --- a/gcc/c-family/c-format.h
> +++ b/gcc/c-family/c-format.h
> @@ -132,6 +132,11 @@ struct format_type_detail
>  struct format_char_info
>  {
>const char *format_chars;
> +  /* Strings of FORMAT_CHARS characters that begin and end quoting,
> + respectively, and pairs of which should be matched in the same
> + format string.  NULL when no such pairs exist.  */
> +  const char *quote_begin_chars;
> +  const char *quote_end_chars;

I don't think this comment is sufficiently detailed to make it obvious 
what should appear in each field for each of the four kinds of format 
specifiers I enumerated.  The best I can reverse-engineer from the code 
is: NULL for specifiers that may be used either quoted or unquoted *or* 
listing those specifiers in both quote_begin_chars and quote_end_chars 
(but I think the option of listing them in both should be removed as 
conceptually wrong); if the character is an opening or closing quote, list 
it in the appropriate one; if it must be used quoted, but isn't a quote 
itself, both strings must be non-NULL but it must not be listed in either.

Whether that's right or wrong, the comment needs to make the rules clear.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH 5/7] clean up quoting problems - c-family (PR 80280 et al.)

2017-05-03 Thread Joseph Myers
On Tue, 2 May 2017, Martin Sebor wrote:

> +  inform (loc, "in the expansion of concept %qE %qS", check, sub);

Are you sure about this (two consecutive quoted strings, open quote of %qS 
following closing quote of %qE) or should it be a single quoted string 
%<%E %S%>?

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH GCC8][03/33]Refactor invariant variable/expression handling

2017-05-03 Thread Bin.Cheng
On Wed, May 3, 2017 at 3:41 PM, Bin.Cheng  wrote:
> On Wed, May 3, 2017 at 3:38 PM, Christophe Lyon
>  wrote:
>> Hi Bin,
>>
>>
>> On 24 April 2017 at 12:26, Richard Biener  wrote:
>>> On Tue, Apr 18, 2017 at 12:38 PM, Bin Cheng  wrote:
 Hi,
 This patch refactors how invariant variable/expressions are handled.  Now 
 they are
 recorded in the same kind data structure and handled similarly, which 
 makes code
 easier to understand.

 Is it OK?
>>>
>>
>> This patch (r247512) caused regression on some arm
>> targets/cpu/fpu/runtestflags combinations:
>> FAIL:gcc.target/arm/ivopts.c object-size text <= 32
>>
>> See: 
>> http://people.linaro.org/~christophe.lyon/cross-validation/gcc/trunk/247512/report-build-info.html
> Hi Christophe,
> Thanks very much for reporting.  Seems it's more than pure
> refactoring.  I will investigate it.
Hi,
Though I tried to separate patch as much as possible, there are still
dependencies in between.  This regression will be resolved by later
patches.

Thanks,
bin
>
> Thanks,
> bin
>>
>> Thanks,
>>
>> Christophe
>>
>>
>>> Ok.
>>>
>>> Richard.
>>>
 Thanks,
 bin

 2017-04-11  Bin Cheng  

 * tree-ssa-loop-ivopts.c (struct cost_pair): Rename depends_on to
 inv_vars.  Add inv_exprs.
 (struct iv_cand): Rename depends_on to inv_vars.
 (struct ivopts_data): Rename max_inv_id/n_invariant_uses to
 max_inv_var_id/n_inv_var_uses.  Move max_inv_expr_id around.
 Refactor field used_inv_exprs from has_map to array 
 n_inv_expr_uses.
 (dump_cand): Dump inv_vars.
 (tree_ssa_iv_optimize_init): Support inv_vars and inv_exprs.
 (record_invariant, find_depends, add_candidate_1): Ditto.
 (set_group_iv_cost, force_var_cost): Ditto.
 (split_address_cost, ptr_difference_cost, difference_cost): Ditto.
 (get_computation_cost_at, get_computation_cost): Ditto.
 (determine_group_iv_cost_generic): Ditto.
 (determine_group_iv_cost_address): Ditto.
 (determine_group_iv_cost_cond, autoinc_possible_for_pair): Ditto.
 (determine_group_iv_costs): Ditto.
 (iv_ca_recount_cost): Update call to ivopts_global_cost_for_size.
 (iv_ca_set_remove_invariants): Renamed to ...
 (iv_ca_set_remove_invs): ... this.  Support inv_vars and inv_exprs.
 (iv_ca_set_no_cp): Use iv_ca_set_remove_invs.
 (iv_ca_set_add_invariants):  Renamed to ...
 (iv_ca_set_add_invs): ... this.  Support inv_vars and inv_exprs.
 (iv_ca_set_cp): Use iv_ca_set_add_invs.
 (iv_ca_has_deps): Support inv_vars and inv_exprs.
 (iv_ca_new, iv_ca_free, iv_ca_dump, free_loop_data): Ditto.
 (create_new_ivs): Remove useless dump.

 gcc/testsuite/ChangeLog
 2017-04-11  Bin Cheng  

 * g++.dg/tree-ssa/ivopts-3.C: Adjust test string.


Re: [PATCH] warn on mem calls modifying objects of non-trivial types (PR 80560)

2017-05-03 Thread Martin Sebor

On 04/30/2017 05:39 PM, Joseph Myers wrote:

On Sat, 29 Apr 2017, Martin Sebor wrote:


+The safe way to either initialize or "reset" objects of non-trivial


Should use TeX quotes in Texinfo files, ``reset''.


Heh :) I wrestled with Emacs to get rid of those,  It kept replacing
my plain quotes with the TeX kind but in the end I won.  Sounds like
I should have trusted it.  Let me restore it in the follow-on patch.

Martin


Fix bootstrap issue with gcc 4.1

2017-05-03 Thread Jan Hubicka
Hi,
my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0.  This is
related to fact that using sreal implies a non-trivial constructor and thus
ggc_cleared_alloc is no longer standard compliant.  I however do not quite 
understand
why GCC 4.1 manages to misoptimize this code but I have checked that this helps
to fix the issue and hopefully will re-start our periodic testers.

Bootstrapped x86_64-linux and comitted.

Honza

Index: ipa-inline.h
===
--- ipa-inline.h(revision 247549)
+++ ipa-inline.h(working copy)
@@ -175,6 +175,16 @@ struct GTY(()) inline_summary
   int growth;
   /* Number of SCC on the beginning of inlining process.  */
   int scc_no;
+
+  void inline_summary ()
+: estimated_self_stack_size (0), self_size (0), self_time (0), min_size 
(0),
+  inlinable (false), contains_cilk_spawn (false), single_caller (false),
+  fp_expressions (false), estimated_stack_size (false),
+  stack_frame_offset (false), time (0), size (0), conds (NULL),
+  entry (NULL), loop_iterations (NULL), loop_stride (NULL),
+  array_ined (NULL), growth (0), scc_no (0)
+{
+}
 };
 
 class GTY((user)) inline_summary_t: public function_summary 
@@ -185,7 +195,7 @@ public:
 
   static inline_summary_t *create_ggc (symbol_table *symtab)
   {
-struct inline_summary_t *summary = new (ggc_cleared_alloc 
 ())
+struct inline_summary_t *summary = new (ggc_alloc  ())
   inline_summary_t(symtab, true);
 summary->disable_insertion_hook ();
 return summary;


Re: Fix bootstrap issue with gcc 4.1

2017-05-03 Thread Jakub Jelinek
On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote:
> Hi,
> my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0.  This is
> related to fact that using sreal implies a non-trivial constructor and thus
> ggc_cleared_alloc is no longer standard compliant.  I however do not quite 
> understand
> why GCC 4.1 manages to misoptimize this code but I have checked that this 
> helps
> to fix the issue and hopefully will re-start our periodic testers.

Is store-merging pass able to optimize that back into reasonable code
(sure, not into ggc_cleared_alloc)?

Jakub


RE: [PATCH][x86] Add missing intrinsics for ADD[SD,SS] and SUB[SD,SS]

2017-05-03 Thread Peryt, Sebastian
Thank you!

Sebastian

-Original Message-
From: Uros Bizjak [mailto:ubiz...@gmail.com] 
Sent: Tuesday, May 2, 2017 3:08 PM
To: Peryt, Sebastian 
Cc: gcc-patches@gcc.gnu.org; kirill.yuk...@gmail.com
Subject: Re: [PATCH][x86] Add missing intrinsics for ADD[SD,SS] and SUB[SD,SS]

On Tue, May 2, 2017 at 11:39 AM, Peryt, Sebastian  
wrote:
> Hi,
> Can you please commit it for me?

Done.

Uros.


[PATCH 1/4][PR tree-optimization/78496] Don't simplify conditionals too early in VRP

2017-05-03 Thread Jeff Law

This is the first of 3-5 patches to address pr78496.

The goal of these patches is to catch jump threads earlier in the 
pipeline to avoid undesirable behavior in PRE and more generally be able 
to exploit the secondary opportunities exposed by jump threading.


One of the more serious issues I found while investigating 78496 was VRP 
failing to find what should have been obvious jump threads.  The 
fundamental issue is VRP will simplify conditionals which are fed by a 
typecast prior to jump threading.   So something like this:


x = (typecast) y;
if (x == 42)

Can often be transformed into:

if (y == 42)


The problem is any ASSERT_EXPRS after the conditional will reference "x" 
rather than "y".  That in turn makes it impossible for VRP to use those 
ASSERT_EXPRs to thread later jumps that use x == 



More concretely consider this gimple code:


;;   basic block 5, loop depth 0, count 0, freq 1, maybe hot
;;prev block 4, next block 12, flags: (NEW, REACHABLE, VISITED)
;;pred:   3 [50.0%]  (TRUE_VALUE,EXECUTABLE)
;;4 [100.0%]  (FALLTHRU,EXECUTABLE)
  # iftmp.0_2 = PHI <1(3), 0(4)>
  in_loop_7 = (unsigned char) iftmp.0_2;
  if (in_loop_7 != 0)
goto ; [33.00%]
  else
goto ; [67.00%]

;;succ:   6 [33.0%]  (TRUE_VALUE,EXECUTABLE)
;;12 [67.0%]  (FALSE_VALUE,EXECUTABLE)

;;   basic block 12, loop depth 0, count 0, freq 6700, maybe hot
;;prev block 5, next block 6, flags: (NEW)
;;pred:   5 [67.0%]  (FALSE_VALUE,EXECUTABLE)
  in_loop_15 = ASSERT_EXPR ;
  goto ; [100.00%]
;;succ:   7 [100.0%]  (FALLTHRU)

;;   basic block 6, loop depth 0, count 0, freq 3300, maybe hot
;;prev block 12, next block 7, flags: (NEW, REACHABLE, VISITED)
;;pred:   5 [33.0%]  (TRUE_VALUE,EXECUTABLE)
  in_loop_14 = ASSERT_EXPR ;
  simple_iv ();
;;succ:   7 [100.0%]  (FALLTHRU,EXECUTABLE)

And later we have:

;;   basic block 9, loop depth 0, count 0, freq 8476, maybe hot
;;prev block 8, next block 10, flags: (NEW, REACHABLE, VISITED)
;;pred:   7 [84.8%]  (FALSE_VALUE,EXECUTABLE)
  if (in_loop_7 == 0)
goto ; [36.64%]
  else
goto ; [63.36%]

VRP knows it can replace the uses of in_loop_7 in the conditionals in 
blocks 5 and 9 with iftmp.0_2 and happily does so *before* jump 
threading (but well after ASSERT_EXPR insertion).


As a result VRP is unable to utilize the ASSERT_EXPRs in blocks 12 and 6 
(which reference in_loop_7) to thread the jump at bb9 (which now 
references iftmp.0_2).



The cases in pr78496 are slightly more complex, but boil down to the 
same core issue -- simplifying the conditional too early.


Thankfully this is easy to fix.  We just split the conditional 
simplification into two steps so that the transformation noted above 
occurs after jump threading (the other simplifications we want to occur 
before jump threading).


This allows VRP1 to pick up 27 missed jump threads in the testcase from 
78496.  It could well be enough to address 78496, but since we don't 
have a solid description of the desired end result I won't consider 
78496 fixed quite yet as there's significant further improvements we can 
make.


Bootstrapped and regression tested on x86_64-linux-gnu.  Installing on 
the trunk.


Jeff



Re: [PATCH 1/4][PR tree-optimization/78496] Don't simplify conditionals too early in VRP

2017-05-03 Thread Jeff Law

[ With the patch attached... ]

On 05/03/2017 10:31 AM, Jeff Law wrote:

This is the first of 3-5 patches to address pr78496.

The goal of these patches is to catch jump threads earlier in the 
pipeline to avoid undesirable behavior in PRE and more generally be able 
to exploit the secondary opportunities exposed by jump threading.


One of the more serious issues I found while investigating 78496 was VRP 
failing to find what should have been obvious jump threads.  The 
fundamental issue is VRP will simplify conditionals which are fed by a 
typecast prior to jump threading.   So something like this:


x = (typecast) y;
if (x == 42)

Can often be transformed into:

if (y == 42)


The problem is any ASSERT_EXPRS after the conditional will reference "x" 
rather than "y".  That in turn makes it impossible for VRP to use those 
ASSERT_EXPRs to thread later jumps that use x == 



More concretely consider this gimple code:


;;   basic block 5, loop depth 0, count 0, freq 1, maybe hot
;;prev block 4, next block 12, flags: (NEW, REACHABLE, VISITED)
;;pred:   3 [50.0%]  (TRUE_VALUE,EXECUTABLE)
;;4 [100.0%]  (FALLTHRU,EXECUTABLE)
   # iftmp.0_2 = PHI <1(3), 0(4)>
   in_loop_7 = (unsigned char) iftmp.0_2;
   if (in_loop_7 != 0)
 goto ; [33.00%]
   else
 goto ; [67.00%]

;;succ:   6 [33.0%]  (TRUE_VALUE,EXECUTABLE)
;;12 [67.0%]  (FALSE_VALUE,EXECUTABLE)

;;   basic block 12, loop depth 0, count 0, freq 6700, maybe hot
;;prev block 5, next block 6, flags: (NEW)
;;pred:   5 [67.0%]  (FALSE_VALUE,EXECUTABLE)
   in_loop_15 = ASSERT_EXPR ;
   goto ; [100.00%]
;;succ:   7 [100.0%]  (FALLTHRU)

;;   basic block 6, loop depth 0, count 0, freq 3300, maybe hot
;;prev block 12, next block 7, flags: (NEW, REACHABLE, VISITED)
;;pred:   5 [33.0%]  (TRUE_VALUE,EXECUTABLE)
   in_loop_14 = ASSERT_EXPR ;
   simple_iv ();
;;succ:   7 [100.0%]  (FALLTHRU,EXECUTABLE)

And later we have:

;;   basic block 9, loop depth 0, count 0, freq 8476, maybe hot
;;prev block 8, next block 10, flags: (NEW, REACHABLE, VISITED)
;;pred:   7 [84.8%]  (FALSE_VALUE,EXECUTABLE)
   if (in_loop_7 == 0)
 goto ; [36.64%]
   else
 goto ; [63.36%]

VRP knows it can replace the uses of in_loop_7 in the conditionals in 
blocks 5 and 9 with iftmp.0_2 and happily does so *before* jump 
threading (but well after ASSERT_EXPR insertion).


As a result VRP is unable to utilize the ASSERT_EXPRs in blocks 12 and 6 
(which reference in_loop_7) to thread the jump at bb9 (which now 
references iftmp.0_2).



The cases in pr78496 are slightly more complex, but boil down to the 
same core issue -- simplifying the conditional too early.


Thankfully this is easy to fix.  We just split the conditional 
simplification into two steps so that the transformation noted above 
occurs after jump threading (the other simplifications we want to occur 
before jump threading).


This allows VRP1 to pick up 27 missed jump threads in the testcase from 
78496.  It could well be enough to address 78496, but since we don't 
have a solid description of the desired end result I won't consider 
78496 fixed quite yet as there's significant further improvements we can 
make.


Bootstrapped and regression tested on x86_64-linux-gnu.  Installing on 
the trunk.


Jeff



diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 92a4e395ba8..32cc81978df 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2017-05-03  Jeff Law  
+
+   PR tree-optimization/78496
+   * tree-vrp.c (simplify_cond_using_ranges_1): Renamed
+   from simplify_cond_using_ranges.  Split off code to walk
+   backwards through casts into ...
+   (simplify_cond_using_ranges_2): New function.
+   (simplify_stmt_using_ranges): Call simplify_cond_using_ranges_1.
+   (execute_vrp): After identifying jump threads, call
+   simplify_cond_using_ranges_2.
+
 2017-05-03  Jeff Downs  
Rainer Orth  
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 596468d33e9..55a44e4635a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-05-03  Jeff Law  
+
+   PR tree-optimization/78496
+   * gcc.dg/tree-ssa/ssa-thread-15.c: New test.
+
 2017-05-03  Uros Bizjak  
 
* g++.dg/lto/pr79671_0.C (foo): Fix asm constraints.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c
new file mode 100644
index 000..f73268cacbb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */ 
+/* { dg-options "-O2 -fdump-tree-vrp1" } */
+
+/* We should thread the if (!in_loop) completely leaving
+   just two conditionals.  */
+/* { dg-final { scan-tree-dump-times "if \\(" 2 "vrp1" } } */
+
+
+union tree_node;
+typedef union tree_node *tree;
+
+enum size_type_kind
+{
+  SIZETYPE,
+  SSIZETYPE,
+  BITSIZETYPE,
+  SBITSIZETYPE,
+ 

Re: Fix bootstrap issue with gcc 4.1

2017-05-03 Thread Jan Hubicka
> On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote:
> > Hi,
> > my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0.  This 
> > is
> > related to fact that using sreal implies a non-trivial constructor and thus
> > ggc_cleared_alloc is no longer standard compliant.  I however do not quite 
> > understand
> > why GCC 4.1 manages to misoptimize this code but I have checked that this 
> > helps
> > to fix the issue and hopefully will re-start our periodic testers.
> 
> Is store-merging pass able to optimize that back into reasonable code
> (sure, not into ggc_cleared_alloc)?

It is still a sequence of assignments to 0 in .optimized pass.

Honza
> 
>   Jakub


Re: Fix bootstrap issue with gcc 4.1

2017-05-03 Thread Richard Biener
On May 3, 2017 6:22:14 PM GMT+02:00, Jakub Jelinek  wrote:
>On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote:
>> Hi,
>> my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0.
> This is
>> related to fact that using sreal implies a non-trivial constructor
>and thus
>> ggc_cleared_alloc is no longer standard compliant.  I however do not
>quite understand
>> why GCC 4.1 manages to misoptimize this code but I have checked that
>this helps
>> to fix the issue and hopefully will re-start our periodic testers.
>
>Is store-merging pass able to optimize that back into reasonable code
>(sure, not into ggc_cleared_alloc)?

Whether it is or not, the previous code was buggy.  The zeroing does not 
prevail until after the object construction begins (not sure whether for PODs 
this would be different).

Richard.

>   Jakub



Re: Fix bootstrap issue with gcc 4.1

2017-05-03 Thread Jakub Jelinek
On Wed, May 03, 2017 at 06:39:18PM +0200, Jan Hubicka wrote:
> > On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote:
> > > Hi,
> > > my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0.  
> > > This is
> > > related to fact that using sreal implies a non-trivial constructor and 
> > > thus
> > > ggc_cleared_alloc is no longer standard compliant.  I however do not 
> > > quite understand
> > > why GCC 4.1 manages to misoptimize this code but I have checked that this 
> > > helps
> > > to fix the issue and hopefully will re-start our periodic testers.
> > 
> > Is store-merging pass able to optimize that back into reasonable code
> > (sure, not into ggc_cleared_alloc)?
> 
> It is still a sequence of assignments to 0 in .optimized pass.

Including the bool fields that are adjacent?

Jakub


Re: Fix bootstrap issue with gcc 4.1

2017-05-03 Thread Jakub Jelinek
On Wed, May 03, 2017 at 06:44:46PM +0200, Richard Biener wrote:
> On May 3, 2017 6:22:14 PM GMT+02:00, Jakub Jelinek  wrote:
> >On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote:
> >> Hi,
> >> my change to sreals makes GCC to be miscompiled with GCC 4.1 and -O0.
> > This is
> >> related to fact that using sreal implies a non-trivial constructor
> >and thus
> >> ggc_cleared_alloc is no longer standard compliant.  I however do not
> >quite understand
> >> why GCC 4.1 manages to misoptimize this code but I have checked that
> >this helps
> >> to fix the issue and hopefully will re-start our periodic testers.
> >
> >Is store-merging pass able to optimize that back into reasonable code
> >(sure, not into ggc_cleared_alloc)?
> 
> Whether it is or not, the previous code was buggy.  The zeroing does not
> prevail until after the object construction begins (not sure whether for
> PODs this would be different).

Sure, I'm not questioning the patch, just wondering if we shouldn't improve
store-merging further (we want to do it anyway for e.g. bitop adjacent
operations etc.).

Jakub


Re: [RFC PATCH 0/3] Call summary class

2017-05-03 Thread Martin Jambor
On Mon, Feb 27, 2017 at 05:36:48PM +0100, Martin Jambor wrote:
> Hello,
> 
> the patch sequence in this thread adds a call_summary class, which is
> analogous to function_summary we already have but which gathers
> information about call graph edges, rather than nodes.
> 
> The first patch implements the class itself, the second modifies
> ipa-prop.[ch] and ipa-cp.c to use it instead of a vector indexed by
> edge->uid and the third patch is a semi-related cleanup I spotted
> along the way.
> 
> I have LTO-bootstrapped and tested the patches on x86_64-linux and
> successfully LTO-built Mozilla Firefox with it.  I'll be grateful for
> any comments, otherwise I'll queue the series for the next stage1.
> 

After Honza re-approved the patches in person, I have committed them
as revisions 247557, 247558 and 247559.

Thanks,

Martin

> 
> Martin Jambor (3):
>   call_summary to keep info about cgraph_edges
>   Use call_summary in ipa-prop and ipa-cp
>   Remove ipa_update_after_lto_read
> 
>  gcc/ipa-cp.c  |   4 -
>  gcc/ipa-inline-analysis.c |   2 +-
>  gcc/ipa-inline.c  |   3 -
>  gcc/ipa-profile.c |   2 +-
>  gcc/ipa-prop.c|  82 --
>  gcc/ipa-prop.h|  60 +
>  gcc/symbol-summary.h  | 214 
> +-
>  7 files changed, 273 insertions(+), 94 deletions(-)
> 
> -- 
> 2.11.1


Re: Fix bootstrap issue with gcc 4.1

2017-05-03 Thread Richard Biener
On May 3, 2017 6:46:05 PM GMT+02:00, Jakub Jelinek  wrote:
>On Wed, May 03, 2017 at 06:44:46PM +0200, Richard Biener wrote:
>> On May 3, 2017 6:22:14 PM GMT+02:00, Jakub Jelinek 
>wrote:
>> >On Wed, May 03, 2017 at 06:18:08PM +0200, Jan Hubicka wrote:
>> >> Hi,
>> >> my change to sreals makes GCC to be miscompiled with GCC 4.1 and
>-O0.
>> > This is
>> >> related to fact that using sreal implies a non-trivial constructor
>> >and thus
>> >> ggc_cleared_alloc is no longer standard compliant.  I however do
>not
>> >quite understand
>> >> why GCC 4.1 manages to misoptimize this code but I have checked
>that
>> >this helps
>> >> to fix the issue and hopefully will re-start our periodic testers.
>> >
>> >Is store-merging pass able to optimize that back into reasonable
>code
>> >(sure, not into ggc_cleared_alloc)?
>> 
>> Whether it is or not, the previous code was buggy.  The zeroing does
>not
>> prevail until after the object construction begins (not sure whether
>for
>> PODs this would be different).
>
>Sure, I'm not questioning the patch, just wondering if we shouldn't
>improve
>store-merging further (we want to do it anyway for e.g. bitop adjacent
>operations etc.).

We definitely want to do that.  It should also 'nicely' merge with bswap for 
gathering the load side of a piecewise memory to memory copy.

Richard.

>
>   Jakub



Re: [PATCH 1/4][PR tree-optimization/78496] Don't simplify conditionals too early in VRP

2017-05-03 Thread Bin.Cheng
On Wed, May 3, 2017 at 5:32 PM, Jeff Law  wrote:
> [ With the patch attached... ]
>
>
> On 05/03/2017 10:31 AM, Jeff Law wrote:
>>
>> This is the first of 3-5 patches to address pr78496.
>>
>> The goal of these patches is to catch jump threads earlier in the pipeline
>> to avoid undesirable behavior in PRE and more generally be able to exploit
>> the secondary opportunities exposed by jump threading.
>>
>> One of the more serious issues I found while investigating 78496 was VRP
>> failing to find what should have been obvious jump threads.  The fundamental
>> issue is VRP will simplify conditionals which are fed by a typecast prior to
>> jump threading.   So something like this:
>>
>> x = (typecast) y;
>> if (x == 42)
>>
>> Can often be transformed into:
>>
>> if (y == 42)
>>
>>
>> The problem is any ASSERT_EXPRS after the conditional will reference "x"
>> rather than "y".  That in turn makes it impossible for VRP to use those
>> ASSERT_EXPRs to thread later jumps that use x == 
>>
>>
>> More concretely consider this gimple code:
>>
>>
>> ;;   basic block 5, loop depth 0, count 0, freq 1, maybe hot
>> ;;prev block 4, next block 12, flags: (NEW, REACHABLE, VISITED)
>> ;;pred:   3 [50.0%]  (TRUE_VALUE,EXECUTABLE)
>> ;;4 [100.0%]  (FALLTHRU,EXECUTABLE)
>># iftmp.0_2 = PHI <1(3), 0(4)>
>>in_loop_7 = (unsigned char) iftmp.0_2;
>>if (in_loop_7 != 0)
>>  goto ; [33.00%]
>>else
>>  goto ; [67.00%]
>>
>> ;;succ:   6 [33.0%]  (TRUE_VALUE,EXECUTABLE)
>> ;;12 [67.0%]  (FALSE_VALUE,EXECUTABLE)
>>
>> ;;   basic block 12, loop depth 0, count 0, freq 6700, maybe hot
>> ;;prev block 5, next block 6, flags: (NEW)
>> ;;pred:   5 [67.0%]  (FALSE_VALUE,EXECUTABLE)
>>in_loop_15 = ASSERT_EXPR ;
>>goto ; [100.00%]
>> ;;succ:   7 [100.0%]  (FALLTHRU)
>>
>> ;;   basic block 6, loop depth 0, count 0, freq 3300, maybe hot
>> ;;prev block 12, next block 7, flags: (NEW, REACHABLE, VISITED)
>> ;;pred:   5 [33.0%]  (TRUE_VALUE,EXECUTABLE)
>>in_loop_14 = ASSERT_EXPR ;
>>simple_iv ();
>> ;;succ:   7 [100.0%]  (FALLTHRU,EXECUTABLE)
>>
>> And later we have:
>>
>> ;;   basic block 9, loop depth 0, count 0, freq 8476, maybe hot
>> ;;prev block 8, next block 10, flags: (NEW, REACHABLE, VISITED)
>> ;;pred:   7 [84.8%]  (FALSE_VALUE,EXECUTABLE)
>>if (in_loop_7 == 0)
>>  goto ; [36.64%]
>>else
>>  goto ; [63.36%]
>>
>> VRP knows it can replace the uses of in_loop_7 in the conditionals in
>> blocks 5 and 9 with iftmp.0_2 and happily does so *before* jump threading
>> (but well after ASSERT_EXPR insertion).
>>
>> As a result VRP is unable to utilize the ASSERT_EXPRs in blocks 12 and 6
>> (which reference in_loop_7) to thread the jump at bb9 (which now references
>> iftmp.0_2).
>>
>>
>> The cases in pr78496 are slightly more complex, but boil down to the same
>> core issue -- simplifying the conditional too early.
>>
>> Thankfully this is easy to fix.  We just split the conditional
>> simplification into two steps so that the transformation noted above occurs
>> after jump threading (the other simplifications we want to occur before jump
>> threading).
>>
>> This allows VRP1 to pick up 27 missed jump threads in the testcase from
>> 78496.  It could well be enough to address 78496, but since we don't have a
>> solid description of the desired end result I won't consider 78496 fixed
>> quite yet as there's significant further improvements we can make.
>>
>> Bootstrapped and regression tested on x86_64-linux-gnu.  Installing on the
>> trunk.
>>
>> Jeff
>>
>
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 92a4e395ba8..32cc81978df 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,14 @@
> +2017-05-03  Jeff Law  
> +
> +   PR tree-optimization/78496
> +   * tree-vrp.c (simplify_cond_using_ranges_1): Renamed
> +   from simplify_cond_using_ranges.  Split off code to walk
> +   backwards through casts into ...
> +   (simplify_cond_using_ranges_2): New function.
> +   (simplify_stmt_using_ranges): Call simplify_cond_using_ranges_1.
> +   (execute_vrp): After identifying jump threads, call
> +   simplify_cond_using_ranges_2.
> +
>  2017-05-03  Jeff Downs  
> Rainer Orth  
>
> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
> index 596468d33e9..55a44e4635a 100644
> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,8 @@
> +2017-05-03  Jeff Law  
> +
> +   PR tree-optimization/78496
> +   * gcc.dg/tree-ssa/ssa-thread-15.c: New test.
> +
>  2017-05-03  Uros Bizjak  
>
> * g++.dg/lto/pr79671_0.C (foo): Fix asm constraints.
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c
> new file mode 100644
> index 000..f73268cacbb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c
> @@ -0,0 +1,51 @@
> +/* { dg-do

[committed] Fix typo in common.opt

2017-05-03 Thread David Malcolm
Successfully bootstrapped®rtested on x86_64-pc-linux-gnu.

Committed to trunk as r247562.

gcc/ChangeLog:
* common.opt (fdiagnostics-parseable-fixits): Fix typo.
---
 gcc/common.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 4021622..5817407 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1225,7 +1225,7 @@ Enum(diagnostic_color_rule) String(auto) 
Value(DIAGNOSTICS_COLOR_AUTO)
 
 fdiagnostics-parseable-fixits
 Common Var(flag_diagnostics_parseable_fixits)
-Print fixit hints in machine-readable form.
+Print fix-it hints in machine-readable form.
 
 fdiagnostics-generate-patch
 Common Var(flag_diagnostics_generate_patch)
-- 
1.8.5.3



Re: C++ PATCH to fix ICE in replace_placeholders_r (PR c++/79937)

2017-05-03 Thread Jason Merrill
On Tue, Apr 25, 2017 at 12:17 PM, Marek Polacek  wrote:
> On Fri, Apr 07, 2017 at 03:27:36PM -0400, Jason Merrill wrote:
>> On Fri, Mar 24, 2017 at 12:22 PM, Marek Polacek  wrote:
>> > On Thu, Mar 23, 2017 at 05:09:58PM -0400, Jason Merrill wrote:
>> >> On Thu, Mar 23, 2017 at 4:34 PM, Marek Polacek  wrote:
>> >> > On Tue, Mar 14, 2017 at 02:34:30PM -0400, Jason Merrill wrote:
>> >> >> On Tue, Mar 14, 2017 at 2:33 PM, Jason Merrill  
>> >> >> wrote:
>> >> >> > On Tue, Mar 7, 2017 at 12:10 PM, Marek Polacek  
>> >> >> > wrote:
>> >> >> >> In this testcase we have
>> >> >> >> C c = bar (X{1});
>> >> >> >> which store_init_value sees as
>> >> >> >> c = TARGET_EXPR > >> >> >> .n=(&)->i}>)>
>> >> >> >> i.e. we're initializing "c" with a TARGET_EXPR.  We call 
>> >> >> >> replace_placeholders
>> >> >> >> that walks the whole tree to substitute the placeholders.  
>> >> >> >> Eventually we find
>> >> >> >> the nested  but that's for another 
>> >> >> >> object, so we
>> >> >> >> crash.  Seems that we shouldn't have stepped into the second 
>> >> >> >> TARGET_EXPR at
>> >> >> >> all; it has nothing to with "c", it's bar's argument.
>> >> >> >>
>> >> >> >> It occurred to me that we shouldn't step into CALL_EXPRs and leave 
>> >> >> >> the
>> >> >> >> placeholders in function arguments to cp_gimplify_init_expr which 
>> >> >> >> calls
>> >> >> >> replace_placeholders for constructors.  Not sure if it's enough to 
>> >> >> >> handle
>> >> >> >> CALL_EXPRs like this, anything else?
>> >> >> >
>> >> >> > Hmm, we might have a DMI containing a call with an argument referring
>> >> >> > to *this, i.e.
>> >> >> >
>> >> >> > struct A
>> >> >> > {
>> >> >> >   int i;
>> >> >> >   int j = frob (this->i);
>> >> >> > };
>> >> >> >
>> >> >> > The TARGET_EXPR seems like a more likely barrier, but even there we
>> >> >> > could have something like
>> >> >> >
>> >> >> > struct A { int i; };
>> >> >> > struct B
>> >> >> > {
>> >> >> >   int i;
>> >> >> >   A a = A{this->i};
>> >> >> > };
>> >> >> >
>> >> >> > I think we need replace_placeholders to keep a stack of objects, so
>> >> >> > that when we see a TARGET_EXPR we add it to the stack and therefore
>> >> >> > can properly replace a PLACEHOLDER_EXPR of its type.
>> >> >>
>> >> >> Or actually, avoid replacing such a PLACEHOLDER_EXPR, but rather leave
>> >> >> it for later when we lower the TARGET_EXPR.
>> >> >
>> >> > Sorry, I don't really follow.  I have a patch that puts TARGET_EXPRs on
>> >> > a stack, but I don't know how that helps.  E.g. with nsdmi-aggr3.C
>> >> > we have
>> >> > B b = TARGET_EXPR > >> > &>}>
>> >> > so when we get to that PLACEHOLDER_EXPR, on the stack there's
>> >> > TARGET_EXPR with type struct A
>> >> > TARGET_EXPR with type struct B
>> >> > so the type of the PLACEHOLDER_EXPR doesn't match the type of the 
>> >> > current
>> >> > TARGET_EXPR, but we still want to replace it in this case.
>> >> >
>> >> > So -- could you expand a bit on what you had in mind, please?
>> >>
>> >> So then when we see a placeholder, we walk the stack to find the
>> >> object of the matching type.
>> >>
>> >> But if the object we find was collected from walking through a
>> >> TARGET_EXPR, we should leave the PLACEHOLDER_EXPR alone, so that it
>> >> can be replaced later with the actual target of the initialization.
>> >
>> > Unfortunately, I still don't understand; guess I'll have to drop this PR.
>> >
>> > With this we put TARGET_EXPRs on a stack, and then when we find a
>> > PLACEHOLDER_EXPR we walk the stack to find a TARGET_EXPR of the same type 
>> > as
>> > the PLACEHOLDER_EXPR.  There are three simplified examples I've been 
>> > playing
>> > with:
>> >
>> >   B b = T_E >}>
>> >
>> >   - here we should replace the P_E; on the stack there are two
>> > TARGET_EXPRs of types B and A
>> >
>> >   C c = T_E >)>
>> >
>> >   - here we shouldn't replace the P_E; on the stack there are two
>> > TARGET_EXPRs of types X and C
>> >
>> >   B b = T_E }}>
>> >
>> >   - here we should replace the P_E; on the stack there's one TARGET_EXPR
>> > of type B
>> >
>> > In each case we find a TARGET_EXPR of the type of the PLACEHOLDER_EXPR, 
>> > but I
>> > don't see how to decide which PLACEHOLDER_EXPR we should let slide.  Sorry 
>> > for
>> > being dense...
>>
>> I was thinking that we want to replace the type of the first entry in
>> the stack (B, C, B respectively), and leave others alone.
>
> Even that didn't work for me, because we may end up with a case of
>
>   C c = bar (T_E >)

Why is that a problem?

Your patch doesn't fix this variant:

struct X {
  unsigned i;
  unsigned n = i;
};

unsigned int
bar (X x)
{
  return x.n;
}

int main()
{
  X x = { 2, bar (X{1}) };
  if (x.n != 1)
__builtin_abort ();
}

Here we have two X objects, and we need to recognize them as distinct.

Jason


Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)

2017-05-03 Thread Martin Sebor

On 05/03/2017 09:53 AM, Joseph Myers wrote:

On Tue, 2 May 2017, Martin Sebor wrote:


In bug 80280 - Missing closing quote (%>) c/semantics.c and
c/c-typeck.c, a translator points out one of a number of kinds
of cosmetic problems that tend to come up late in development,
during translation of GCC messages.  Other, arguably more minor,
kinds of issues are caused by forgetting to use proper quoting
when referencing tree nodes, such as %D or %T.

To help avoid these kinds of bugs, the attached patch enhances
the -Wformat checker to detect these common quoting issues and
report them as warnings.


As I see it, there are four kinds of format specifiers in this context:

* Specifiers that have no special interaction with quoting and may be used
either quoted or unquoted.  For example, %d, %s and %E.  (%E is only of
that kind when used for expressions - when hopefully it should only be
used for simple expressions such as constants.  Uses for identifiers
should be quoted; cases where there can be complex expressions should be
replaced by use of location ranges.  Ideally identifiers and expressions
should have different formats, so these cases can be distinguished and so
identifiers can end up with a static type other than "tree".)

* Specifiers that should be quoted.  For example, %D.

* Specifiers that open quoting (%<).

* Specifiers that close quoting (%>).

(In addition, the q flag serves to quote the specifier it's applied to.)


Thanks for comments.  That above matches my view.




The remaining six patches in the series correct the problems
highlighted by the warning and get GCC to bootstrap and pass
regression tests on x86_64.  I suspect additional fixes will
be needed to get GCC to bootstrap on other targets.  I'll do
powerpc64le next but besides a general review I'm looking for
suggestions how to do the same cleanup on all the other targets
with the least disruption. (E.g., if there's a way to roll out
a warning one target at a time I could introduce it under
a suboption of -Wformat and enable the subobption with -Werror
only on the targets that have already been verified.)


Use contrib/config-list.mk, with a native compiler with this patch in the
PATH, to test building compilers for many configurations.  (No doubt
you'll also find existing build issues, which may or may not be filed in
Bugzilla.)


I can do some of it but not all of it.  The work doesn't involve
just building the compiler but also running the tests and fixing
up regressions in those that are written to expect the unquoted
diagnostics.  I don't have the ability to run the test suite on
all the targets, or the bandwidth to run it on a subset of them
beyond powerpc64 and x86_64.  So I'm hoping to find a way for
the core of the patch to be checked in and for the cleanup to
proceed subsequently, as target maintainers find time.




+  /* Diagnose nested or unmatched quoting directives such as GCC's
+"%<...%<" and "%>...%>".  As a special case, a directive can
+be considered to both begin and end quoting (e.g., GCC's %E).
+Such a directive is recognized but not diagosed.  */


I don't think it makes conceptual sense to consider %E to both begin and
end quoting.  I'd expect %E to have exactly the same begin_quote and
end_quote (or whatever) data as %d and %s, because it has the same
properties (may be used quoted or unquoted).


Yes, that's exactly how %E is treated by the code.  The special
case the comment above refers to is meant to explain how the
begin_quote and end_quote local variables get set, not describe
a concept in the design.  I renamed the variables so as not to
suggest otherwise.




diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h
index 13ca8ea..8932861 100644
--- a/gcc/c-family/c-format.h
+++ b/gcc/c-family/c-format.h
@@ -132,6 +132,11 @@ struct format_type_detail
 struct format_char_info
 {
   const char *format_chars;
+  /* Strings of FORMAT_CHARS characters that begin and end quoting,
+ respectively, and pairs of which should be matched in the same
+ format string.  NULL when no such pairs exist.  */
+  const char *quote_begin_chars;
+  const char *quote_end_chars;


I don't think this comment is sufficiently detailed to make it obvious
what should appear in each field for each of the four kinds of format
specifiers I enumerated.  The best I can reverse-engineer from the code
is: NULL for specifiers that may be used either quoted or unquoted *or*
listing those specifiers in both quote_begin_chars and quote_end_chars
(but I think the option of listing them in both should be removed as
conceptually wrong); if the character is an opening or closing quote, list
it in the appropriate one; if it must be used quoted, but isn't a quote
itself, both strings must be non-NULL but it must not be listed in either.

Whether that's right or wrong, the comment needs to make the rules clear.


I've clarified the comment.

Thanks
Martin
PR translation/80280 - Missing closing quote (%>) c/s

Re: [PATCH] Fix ICE in modified_type_die (PR debug/80461)

2017-05-03 Thread Jason Merrill
On Wed, Apr 19, 2017 at 12:29 PM, Jeff Law  wrote:
> On 04/19/2017 10:13 AM, Jakub Jelinek wrote:
>>
>> On Wed, Apr 19, 2017 at 09:55:19AM -0600, Jeff Law wrote:
>>>
>>> On 04/19/2017 09:10 AM, Jakub Jelinek wrote:

 On Wed, Apr 19, 2017 at 08:57:52AM -0600, Jeff Law wrote:
>>
>> 2017-04-19  Jakub Jelinek  
>>
>> PR debug/80461
>> * dwarf2out.c (modified_type_die, gen_type_die_with_usage):
>> Check for t with zero TYPE_QUALS_NO_ADDR_SPACE.
>>
>> * g++.dg/debug/pr80461.C: New test.
>
> I'm going to assume your use of TYPE_QUALS_NO_ADDR_SPACE vs TYPE_QUALS
> or
> TYPE_QUALS_NO_ADDR_SPACE_NO_ATOMIC is correct.


 I don't really know.  For address space quals I think one would need to
 have
 pointer-to-members in different code address spaces, not sure if we
 support
 anything like that.  And atomic is C only which doesn't have
 pointer-to-members.
>>>
>>> To put it another way, in your message you indicated that
>>> modified_type_die
>>> expects either an unqualified type or the type itself and that your patch
>>> makes sure we only pick unqualified types.
>>>
>>> Using TYPE_QUALS_NO_ADDR_SPACE like you have seems to conflict with those
>>> statements.  So I'm curious why you allow address space qualifiers to
>>> pass
>>> through, but no others.   It just seems odd.
>>
>>
>> I used TYPE_QUALS_NO_ADDR_SPACE because that seems to be used in
>> modified_type_die elsewhere (dwarf2out.c has only one use of TYPE_QUALS
>> and
>> even that one ignores addr space bits, as it is masked with qual_mask;
>> the rest just TYPE_QUALS_NO_ADDR_SPACE).  I think with FUNCTION/METHOD
>> types
>> with addr space quals we end up in grossly untested territory that most
>> likely will just not work at all.  I think we don't do anything with the
>> address space quals right now, in the future DWARF has segment identifiers
>> and something like that could be emitted, but there needs to be ABI
>> agreement on what it means.
>
> OK.  So let's go with your patch -- my reading of modified_type_die was that
> it would ignore address-space qualifiers.  So I think your patch is safe, it
> was the inconsistency in the message and the patch itself I was most
> concerned about.

BTW, I think it would be a bit more correct to compare the quals with
those of "main" rather than compare them to 0, though I suspect there
aren't currently any types for which that produces a different result.

Jason


Re: [PATCH] [PR79542][Ada] Fix ICE in dwarf2out.c with nested func. inlining

2017-05-03 Thread Jason Merrill
On Tue, Mar 14, 2017 at 8:24 AM, Pierre-Marie de Rodat
 wrote:
> Hello,
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79542 reports an ICE in
> dwarf2out.c for an Ada testcase built with optimization.
>
> This crash happens during the late generation pass because
> add_gnat_descriptive_type cannot find the type DIE corresponding to some
> descriptive type after having tried to generate it. This is because the
> DIE was generated during the early generation pass, but then pruned by
> the type pruning machinery. So why was it pruned?
>
> We are in a situation where we have cloned types (because of inlining,
> IIUC) whose TYPE_NAME have non-null DECL_ABSTRACT_ORIGIN attributes. As
> a consequence:
>
>   * In modified_type_die, the "handle C typedef types" part calls
> gen_type_die on the cloned type.
>
>   * gen_type_die matches a typedef variant, and then calls gen_decl_die
> on its TYPE_NAME, which will end up calling gen_typedef_die.
>
>   * gen_typedef_die checks decl_ultimate_origin for this TYPE_DECL, and
> finds one, so it only adds a DW_AT_abstract_origin attribute to the
> DW_TAG_typedef DIE, but the cloned type itself does not get its own
> DIE.

That seems like a bug; if gen_typedef_die is going to generate a DIE
for a cloned typedef, it needs to associate the type with the DIE.

>   * Back in modified_type_die, the call to lookup_type_die on the type
> passed to gen_type_die returns NULL.

> In the end, whole type trees, i.e. the ones referenced by
> DECL_ABSTRACT_ORIGIN attributes, are never referenced from type pruning
> "roots" and are thus pruned. The descriptive type at stake here is one
> of them, hence the assertion failure.
>
> This patch attemps to fix that with what seems to be the most sensible
> thing to do in my opinion: updating the "handle C typedef types" part in
> modified_type_die to check decl_ultimate_origin before calling
> gen_type_die: if that function returns something not null, then we know
> that gen_type_die/gen_typedef_die will not generate a DIE for the input
> type, so we try to process the ultimate origin instead.

This soundsn good; the DWARF standard says that we don't need to have
a die at all for the cloned typedef.

> @@ -12496,6 +12496,18 @@ modified_type_die (tree type, int cv_quals, bool 
> reverse,
>
>if (qualified_type == dtype)
> {
> + tree origin
> +  = TYPE_NAME (qualified_type) == NULL
> +? NULL
> +: decl_ultimate_origin (TYPE_NAME (qualified_type));

This is unnecessarily complicated; at this point we already know that
TYPE_NAME (qualified_type) is non-null and in the variable "name".

> + /* Typedef variants that have an abstract origin don't get their own
> +type DIE (see gen_typedef_die), so fall back on the ultimate

gen_typedef_die does create a DIE for them, it just doesn't do it
properly.  But we could change gen_typedef_die to abort in that case,
making this comment correct.

Jason


Re: [PATCH 1/4][PR tree-optimization/78496] Don't simplify conditionals too early in VRP

2017-05-03 Thread Jeff Law

On 05/03/2017 10:55 AM, Bin.Cheng wrote:

On Wed, May 3, 2017 at 5:32 PM, Jeff Law  wrote:

[ With the patch attached... ]


On 05/03/2017 10:31 AM, Jeff Law wrote:


This is the first of 3-5 patches to address pr78496.

The goal of these patches is to catch jump threads earlier in the pipeline
to avoid undesirable behavior in PRE and more generally be able to exploit
the secondary opportunities exposed by jump threading.

One of the more serious issues I found while investigating 78496 was VRP
failing to find what should have been obvious jump threads.  The fundamental
issue is VRP will simplify conditionals which are fed by a typecast prior to
jump threading.   So something like this:

x = (typecast) y;
if (x == 42)

Can often be transformed into:

if (y == 42)


The problem is any ASSERT_EXPRS after the conditional will reference "x"
rather than "y".  That in turn makes it impossible for VRP to use those
ASSERT_EXPRs to thread later jumps that use x == 


More concretely consider this gimple code:


;;   basic block 5, loop depth 0, count 0, freq 1, maybe hot
;;prev block 4, next block 12, flags: (NEW, REACHABLE, VISITED)
;;pred:   3 [50.0%]  (TRUE_VALUE,EXECUTABLE)
;;4 [100.0%]  (FALLTHRU,EXECUTABLE)
# iftmp.0_2 = PHI <1(3), 0(4)>
in_loop_7 = (unsigned char) iftmp.0_2;
if (in_loop_7 != 0)
  goto ; [33.00%]
else
  goto ; [67.00%]

;;succ:   6 [33.0%]  (TRUE_VALUE,EXECUTABLE)
;;12 [67.0%]  (FALSE_VALUE,EXECUTABLE)

;;   basic block 12, loop depth 0, count 0, freq 6700, maybe hot
;;prev block 5, next block 6, flags: (NEW)
;;pred:   5 [67.0%]  (FALSE_VALUE,EXECUTABLE)
in_loop_15 = ASSERT_EXPR ;
goto ; [100.00%]
;;succ:   7 [100.0%]  (FALLTHRU)

;;   basic block 6, loop depth 0, count 0, freq 3300, maybe hot
;;prev block 12, next block 7, flags: (NEW, REACHABLE, VISITED)
;;pred:   5 [33.0%]  (TRUE_VALUE,EXECUTABLE)
in_loop_14 = ASSERT_EXPR ;
simple_iv ();
;;succ:   7 [100.0%]  (FALLTHRU,EXECUTABLE)

And later we have:

;;   basic block 9, loop depth 0, count 0, freq 8476, maybe hot
;;prev block 8, next block 10, flags: (NEW, REACHABLE, VISITED)
;;pred:   7 [84.8%]  (FALSE_VALUE,EXECUTABLE)
if (in_loop_7 == 0)
  goto ; [36.64%]
else
  goto ; [63.36%]

VRP knows it can replace the uses of in_loop_7 in the conditionals in
blocks 5 and 9 with iftmp.0_2 and happily does so *before* jump threading
(but well after ASSERT_EXPR insertion).

As a result VRP is unable to utilize the ASSERT_EXPRs in blocks 12 and 6
(which reference in_loop_7) to thread the jump at bb9 (which now references
iftmp.0_2).


The cases in pr78496 are slightly more complex, but boil down to the same
core issue -- simplifying the conditional too early.

Thankfully this is easy to fix.  We just split the conditional
simplification into two steps so that the transformation noted above occurs
after jump threading (the other simplifications we want to occur before jump
threading).

This allows VRP1 to pick up 27 missed jump threads in the testcase from
78496.  It could well be enough to address 78496, but since we don't have a
solid description of the desired end result I won't consider 78496 fixed
quite yet as there's significant further improvements we can make.

Bootstrapped and regression tested on x86_64-linux-gnu.  Installing on the
trunk.

Jeff




diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 92a4e395ba8..32cc81978df 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2017-05-03  Jeff Law  
+
+   PR tree-optimization/78496
+   * tree-vrp.c (simplify_cond_using_ranges_1): Renamed
+   from simplify_cond_using_ranges.  Split off code to walk
+   backwards through casts into ...
+   (simplify_cond_using_ranges_2): New function.
+   (simplify_stmt_using_ranges): Call simplify_cond_using_ranges_1.
+   (execute_vrp): After identifying jump threads, call
+   simplify_cond_using_ranges_2.
+
  2017-05-03  Jeff Downs  
 Rainer Orth  

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 596468d33e9..55a44e4635a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-05-03  Jeff Law  
+
+   PR tree-optimization/78496
+   * gcc.dg/tree-ssa/ssa-thread-15.c: New test.
+
  2017-05-03  Uros Bizjak  

 * g++.dg/lto/pr79671_0.C (foo): Fix asm constraints.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c
new file mode 100644
index 000..f73268cacbb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-15.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-vrp1" } */
+
+/* We should thread the if (!in_loop) completely leaving
+   just two conditionals.  */
+/* { dg-final { scan-tree-dump-times "if \\(" 2 "vrp1" } } */
+
+
+union tree_node;
+typedef union tree_node *tree;
+
+enum 

[PATCH, rs6000] Avoid vectorizing versioned copy loops with vectorization factor 2

2017-05-03 Thread Bill Schmidt
Hi,

We recently became aware of some poor code generation as a result of
unprofitable (for POWER) loop vectorization.  When a loop is simply copying
data with 64-bit loads and stores, vectorizing with 128-bit loads and stores
generally does not provide any benefit on modern POWER processors.
Furthermore, if there is a requirement to version the loop for aliasing,
alignment, etc., the cost of the versioning test is almost certainly a
performance loss for such loops.  The user code example included such a copy
loop, executed only a few times on average, within an outer loop that was
executed many times on average, causing a tremendous slowdown.

This patch very specifically targets these kinds of loops and no others,
and artificially inflates the vectorization cost to ensure vectorization
does not appear profitable.  This is done within the target model cost
hooks to avoid affecting other targets.  A new test case is included that
demonstrates the refusal to vectorize.

We've done SPEC performance testing to verify that the patch does not
degrade such workloads.  Results were all in the noise range.  The
customer code performance loss was verified to have been reversed.

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions.
Is this ok for trunk?

Thanks,
Bill


[gcc]

2017-05-03  Bill Schmidt  

* config/rs6000/rs6000.c (rs6000_vect_nonmem): New static var.
(rs6000_init_cost): Initialize rs6000_vect_nonmem.
(rs6000_add_stmt_cost): Update rs6000_vect_nonmem.
(rs6000_finish_cost): Avoid vectorizing simple copy loops with
VF=2 that require versioning.

[gcc/testsuite]

2017-05-03  Bill Schmidt  

* gcc.target/powerpc/veresioned-copy-loop.c: New file.


Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 247560)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -5873,6 +5873,8 @@ rs6000_density_test (rs6000_cost_data *data)
 
 /* Implement targetm.vectorize.init_cost.  */
 
+static bool rs6000_vect_nonmem;
+
 static void *
 rs6000_init_cost (struct loop *loop_info)
 {
@@ -5881,6 +5883,7 @@ rs6000_init_cost (struct loop *loop_info)
   data->cost[vect_prologue] = 0;
   data->cost[vect_body] = 0;
   data->cost[vect_epilogue] = 0;
+  rs6000_vect_nonmem = false;
   return data;
 }
 
@@ -5907,6 +5910,19 @@ rs6000_add_stmt_cost (void *data, int count, enum
 
   retval = (unsigned) (count * stmt_cost);
   cost_data->cost[where] += retval;
+
+  /* Check whether we're doing something other than just a copy loop.
+Not all such loops may be profitably vectorized; see
+rs6000_finish_cost.  */
+  if ((where == vect_body
+  && (kind == vector_stmt || kind == vec_to_scalar || kind == vec_perm
+  || kind == vec_promote_demote || kind == vec_construct
+  || kind == scalar_to_vec))
+ || (where != vect_body
+ && (kind == vec_to_scalar || kind == vec_perm
+ || kind == vec_promote_demote || kind == vec_construct
+ || kind == scalar_to_vec)))
+   rs6000_vect_nonmem = true;
 }
 
   return retval;
@@ -5923,6 +5939,19 @@ rs6000_finish_cost (void *data, unsigned *prologue
   if (cost_data->loop_info)
 rs6000_density_test (cost_data);
 
+  /* Don't vectorize minimum-vectorization-factor, simple copy loops
+ that require versioning for any reason.  The vectorization is at
+ best a wash inside the loop, and the versioning checks make
+ profitability highly unlikely and potentially quite harmful.  */
+  if (cost_data->loop_info)
+{
+  loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
+  if (!rs6000_vect_nonmem
+ && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
+ && LOOP_REQUIRES_VERSIONING (vec_info))
+   cost_data->cost[vect_body] += 1;
+}
+
   *prologue_cost = cost_data->cost[vect_prologue];
   *body_cost = cost_data->cost[vect_body];
   *epilogue_cost = cost_data->cost[vect_epilogue];
Index: gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c
===
--- gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c  (nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/versioned-copy-loop.c  (working copy)
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+
+/* Verify that a pure copy loop with a vectorization factor of two
+   that requires alignment will not be vectorized.  See the cost
+   model hooks in rs6000.c.  */
+
+typedef long unsigned int size_t;
+typedef unsigned char uint8_t;
+
+extern void *memcpy (void *__restrict __dest, const void *__restrict __src,
+   size_t __n) __attribute__ ((__nothrow__ , __leaf__)) __attribute__ 
((__nonnull__ (1, 2)));
+
+void foo (void *dstPtr, const void *srcPtr, void *

Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)

2017-05-03 Thread Jeff Law

On 05/03/2017 12:47 PM, Martin Sebor wrote:



I can do some of it but not all of it.  The work doesn't involve
just building the compiler but also running the tests and fixing
up regressions in those that are written to expect the unquoted
diagnostics.  I don't have the ability to run the test suite on
all the targets, or the bandwidth to run it on a subset of them
beyond powerpc64 and x86_64.  So I'm hoping to find a way for
the core of the patch to be checked in and for the cleanup to
proceed subsequently, as target maintainers find time.
Just a note.  It's not terrible to run non-execute tests with cross 
tools.   In fact, it's fairly easy.  I'm happy to walk you through the 
process.


Long term I think we want to move away from config-list.mk and instead 
use a buildbot.  I've been experimenting with jenkins.I've got a 
series of jobs that will build the *-elf, *-rtems, *-gnu and *-linux 
targets.  binutils, then gcc, then newlib/glibc.


The newlib (*-elf and *-rtems) pipeline is pretty solid at this point 
and covers ~70 targets from config-list.mk building through newlib.


The glibc pipeline is more problematic, but improving.

It's about ready to leave my little cluster of slaves and relocate to 
the servers in Toronto.


It should be fairly straightforward to take that jenkins work and tie in 
testsuite runs.  Even if we just start with compile.exp and expand one 
day (via the simulators) to the execution tests.


Jeff


Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)

2017-05-03 Thread Joseph Myers
On Wed, 3 May 2017, Martin Sebor wrote:

> > Use contrib/config-list.mk, with a native compiler with this patch in the
> > PATH, to test building compilers for many configurations.  (No doubt
> > you'll also find existing build issues, which may or may not be filed in
> > Bugzilla.)
> 
> I can do some of it but not all of it.  The work doesn't involve
> just building the compiler but also running the tests and fixing
> up regressions in those that are written to expect the unquoted
> diagnostics.  I don't have the ability to run the test suite on
> all the targets, or the bandwidth to run it on a subset of them
> beyond powerpc64 and x86_64.  So I'm hoping to find a way for
> the core of the patch to be checked in and for the cleanup to
> proceed subsequently, as target maintainers find time.

When it's architecture-specific tests, I think it's up to people testing 
on those architectures to fix them.

> > > diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h
> > > index 13ca8ea..8932861 100644
> > > --- a/gcc/c-family/c-format.h
> > > +++ b/gcc/c-family/c-format.h
> > > @@ -132,6 +132,11 @@ struct format_type_detail
> > >  struct format_char_info
> > >  {
> > >const char *format_chars;
> > > +  /* Strings of FORMAT_CHARS characters that begin and end quoting,
> > > + respectively, and pairs of which should be matched in the same
> > > + format string.  NULL when no such pairs exist.  */
> > > +  const char *quote_begin_chars;
> > > +  const char *quote_end_chars;
> > 
> > I don't think this comment is sufficiently detailed to make it obvious
> > what should appear in each field for each of the four kinds of format
> > specifiers I enumerated.  The best I can reverse-engineer from the code
> > is: NULL for specifiers that may be used either quoted or unquoted *or*
> > listing those specifiers in both quote_begin_chars and quote_end_chars
> > (but I think the option of listing them in both should be removed as
> > conceptually wrong); if the character is an opening or closing quote, list
> > it in the appropriate one; if it must be used quoted, but isn't a quote
> > itself, both strings must be non-NULL but it must not be listed in either.
> > 
> > Whether that's right or wrong, the comment needs to make the rules clear.
> 
> I've clarified the comment.

Clarifying the comment is helpful, but a data structure involving putting 
the same character in both still doesn't make sense to me.  It would seem 
a lot clearer to (for example) split "DFKTEV" into separate "DFTV" and 
"EK" cases, where "EK" uses NULL there just like "s", "d" etc. do.

-- 
Joseph S. Myers
jos...@codesourcery.com


New German PO file for 'gcc' (version 7.1.0)

2017-05-03 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the German team of translators.  The file is available at:

http://translationproject.org/latest/gcc/de.po

(This file, 'gcc-7.1.0.de.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

http://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

http://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)

2017-05-03 Thread Martin Sebor

On 05/03/2017 01:59 PM, Joseph Myers wrote:

On Wed, 3 May 2017, Martin Sebor wrote:


Use contrib/config-list.mk, with a native compiler with this patch in the
PATH, to test building compilers for many configurations.  (No doubt
you'll also find existing build issues, which may or may not be filed in
Bugzilla.)


I can do some of it but not all of it.  The work doesn't involve
just building the compiler but also running the tests and fixing
up regressions in those that are written to expect the unquoted
diagnostics.  I don't have the ability to run the test suite on
all the targets, or the bandwidth to run it on a subset of them
beyond powerpc64 and x86_64.  So I'm hoping to find a way for
the core of the patch to be checked in and for the cleanup to
proceed subsequently, as target maintainers find time.


When it's architecture-specific tests, I think it's up to people testing
on those architectures to fix them.


diff --git a/gcc/c-family/c-format.h b/gcc/c-family/c-format.h
index 13ca8ea..8932861 100644
--- a/gcc/c-family/c-format.h
+++ b/gcc/c-family/c-format.h
@@ -132,6 +132,11 @@ struct format_type_detail
 struct format_char_info
 {
   const char *format_chars;
+  /* Strings of FORMAT_CHARS characters that begin and end quoting,
+ respectively, and pairs of which should be matched in the same
+ format string.  NULL when no such pairs exist.  */
+  const char *quote_begin_chars;
+  const char *quote_end_chars;


I don't think this comment is sufficiently detailed to make it obvious
what should appear in each field for each of the four kinds of format
specifiers I enumerated.  The best I can reverse-engineer from the code
is: NULL for specifiers that may be used either quoted or unquoted *or*
listing those specifiers in both quote_begin_chars and quote_end_chars
(but I think the option of listing them in both should be removed as
conceptually wrong); if the character is an opening or closing quote, list
it in the appropriate one; if it must be used quoted, but isn't a quote
itself, both strings must be non-NULL but it must not be listed in either.

Whether that's right or wrong, the comment needs to make the rules clear.


I've clarified the comment.


Clarifying the comment is helpful, but a data structure involving putting
the same character in both still doesn't make sense to me.  It would seem
a lot clearer to (for example) split "DFKTEV" into separate "DFTV" and
"EK" cases, where "EK" uses NULL there just like "s", "d" etc. do.


Then the begin/end strings for the "DFTV" entry will be the empty
string (to indicate that they are expected to be quoted), as in
the attached incremental diff.  Let me know if I misunderstood
and you had something else in mind.

FWIW, I don't mind doing this way if you prefer, but I'm hard
pressed to see the improvement.  All it did is grow the size of
the tables.  The code stayed the same.

Martin

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index f64b6e0..ad56835 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -688,7 +688,8 @@ static const format_char_info gcc_diag_char_table[] =
   { "K", NULL, NULL,   0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q","",   NULL },
 
   { "r", NULL, NULL,   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "","cR",   NULL },
-  { "<>'R","<'R", ">'R", 0, STD_C89, NOARGUMENTS, "",  "",   NULL },
+  { "<>","<", ">", 0, STD_C89, NOARGUMENTS, "",  "",   NULL },
+  { "'R", NULL, NULL,  0, STD_C89, NOARGUMENTS, "",  "",   NULL },
   { "m", NULL, NULL,   0, STD_C89, NOARGUMENTS, "q", "",   NULL },
   { NULL, NULL, NULL,  0, STD_C89, NOLENGTHS, NULL, NULL, NULL }
 };
@@ -706,12 +707,13 @@ static const format_char_info gcc_tdiag_char_table[] =
   /* Custom conversion specifiers.  */
 
   /* These will require a "tree" at runtime.  */
-  { "DFKTEV", "EK", "EK", 0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
-
+  { "DFTV", "", "", 0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
+  { "EK", NULL, NULL, 0, STD_C89, { T89_V,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q+", "",   NULL },
   { "v", NULL, NULL,   0, STD_C89, { T89_I,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "q#",  "",   NULL },
 
   { "r", NULL, NULL,   1, STD_C89, { T89_C,   BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN,  BADLEN  }, "","cR",   NULL },
-  { "<>'R", "<'R", ">'R", 0, STD_C89, NOARGUMENTS, "",  "",   NULL },
+  { "<>", "<", ">",0, STD_C89, NOARGUMENTS, "",  "",   NULL },
+  { "'R", NULL, NULL,  0, STD_C89, NOARGUMENTS, "",  "",   NULL },
   { "m", NULL, NU

Re: [PATCH 1/7] enhance -Wformat to detect quoting problems (PR 80280 et al.)

2017-05-03 Thread Joseph Myers
On Wed, 3 May 2017, Martin Sebor wrote:

> > Clarifying the comment is helpful, but a data structure involving putting
> > the same character in both still doesn't make sense to me.  It would seem
> > a lot clearer to (for example) split "DFKTEV" into separate "DFTV" and
> > "EK" cases, where "EK" uses NULL there just like "s", "d" etc. do.
> 
> Then the begin/end strings for the "DFTV" entry will be the empty
> string (to indicate that they are expected to be quoted), as in
> the attached incremental diff.  Let me know if I misunderstood
> and you had something else in mind.

Yes, that's what I'd expect (incrementally).

> FWIW, I don't mind doing this way if you prefer, but I'm hard
> pressed to see the improvement.  All it did is grow the size of
> the tables.  The code stayed the same.

Really I think it might be better not to have pointers / strings there at 
all - rather, have a four-state enum value that says directly whether 
those format specifiers are quote-neutral, should-be-quoted, left-quote or 
right-quote.  Or that information could go in the existing flags2 field, 
'"' to mean should-be-quoted, '<' to mean left-quote and '>' to mean 
right-quote, for example.

-- 
Joseph S. Myers
jos...@codesourcery.com


[gomp4] Don't mark OpenACC auto loops as independent inside acc parallel regions

2017-05-03 Thread Cesar Philippidis
The OpenACC 2.5 spec updated the behavior of acc loops inside acc
parallel regions such that loop with seq and auto clauses are not
implicitly independent. Back in OpenACC 2.0, all loops inside acc
parallel regions were implicitly independent. Oddly enough, if the user
just places an acc loop without any clauses, it is still implicitly
independent. E.g.

  #pragma acc loop

implies

  #pragma acc loop independent

which is not equal to

  #pragma acc loop auto

I suppose the auto flag is used to explicitly have the compiler
"automatically" detect loop dependencies and partition the loop
accordingly. This patch, which I've applied to gomp-4_0-branch makes GCC
comply with this new behavior.

Cesar
2017-05-03  Cesar Philippidis  

	gcc/
	* omp-low.c (lower_oacc_head_mark): Don't mark OpenACC auto loops as
	independent inside acc parallel regions.

	gcc/testsuite/
	* c-c++-common/goacc/loop-auto-1.c: Adjust test case to conform to
	the new behavior of the auto clause in OpenACC 2.5.
	* c-c++-common/goacc/loop-auto-2.c: Likewise.
	* gcc.dg/goacc/loop-processing-1.c: Likewise.
	* c-c++-common/goacc/loop-auto-3.c: New test.
	* gfortran.dg/goacc/loop-auto-1.f90: New test.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust test case
	to conform to the new behavior of the auto clause in OpenACC 2.5.


diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index cf299c12..9e9a363 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -6638,9 +6638,10 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
   tag |= OLF_GANG_STATIC;
 }
 
-  /* In a parallel region, loops are implicitly INDEPENDENT.  */
+  /* In a parallel region, loops without auto and seq clauses are
+ implicitly INDEPENDENT.  */
   omp_context *tgt = enclosing_target_ctx (ctx);
-  if (!tgt || is_oacc_parallel (tgt))
+  if ((!tgt || is_oacc_parallel (tgt)) && !(tag & (OLF_SEQ | OLF_AUTO)))
 tag |= OLF_INDEPENDENT;
 
   if (tag & OLF_TILE)
diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
index 124befc..dcad07f 100644
--- a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
+++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c
@@ -10,7 +10,7 @@ void Foo ()
 #pragma acc loop seq
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++) {}
   }
 
@@ -20,7 +20,7 @@ void Foo ()
 #pragma acc loop auto
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++)
 	  {
 #pragma acc loop vector
@@ -51,7 +51,7 @@ void Foo ()
 #pragma acc loop vector
 	for (int jx = 0; jx < 10; jx++)
 	  {
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int kx = 0; kx < 10; kx++) {}
 	  }
 
@@ -64,27 +64,27 @@ void Foo ()
 
   }
 
-#pragma acc loop auto
+#pragma acc loop auto independent
 for (int ix = 0; ix < 10; ix++)
   {
-#pragma acc loop auto
+#pragma acc loop auto independent
 	for (int jx = 0; jx < 10; jx++)
 	  {
-#pragma acc loop auto
+#pragma acc loop auto independent
 	for (int kx = 0; kx < 10; kx++) {}
 	  }
   }
 
-#pragma acc loop auto
+#pragma acc loop auto independent
 for (int ix = 0; ix < 10; ix++)
   {
-#pragma acc loop auto
+#pragma acc loop auto independent
 	for (int jx = 0; jx < 10; jx++)
 	  {
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int kx = 0; kx < 10; kx++)
 	  {
-#pragma acc loop auto
+#pragma acc loop auto independent
 		for (int lx = 0; lx < 10; lx++) {}
 	  }
 	  }
@@ -101,7 +101,7 @@ void Gang (void)
 #pragma acc loop seq
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++) {}
   }
 
@@ -111,7 +111,7 @@ void Gang (void)
 #pragma acc loop auto
 	for (int jx = 0; jx < 10; jx++) {}
 
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int jx = 0; jx < 10; jx++)
 	  {
 #pragma acc loop vector
@@ -142,7 +142,7 @@ void Gang (void)
 #pragma acc loop vector
 	for (int jx = 0; jx < 10; jx++)
 	  {
-#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
+#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */
 	for (int kx = 0; kx < 10; kx++) {}
 	  }
 
@@ -176,7 +176,7 @@ void Worker (void)
 #pragma acc loop seq
 	for (int j

Re: [wwwdocs, ARM, AArch64] Document ABI changes and fixes

2017-05-03 Thread Gerald Pfeifer
On Tue, 2 May 2017, Richard Earnshaw (lists) wrote:
> This patch adds some release notes for the gcc ABI changes affecting ARM 
> and AArch64.

This looks fine, thank you.

The one thing you may want to look into is where it says "code 
where class objects are passed by value to functions" which feels
a bit odd.  Perhaps "...passed to functions by value" or just omit
"to functions" given how common of a phrase "passing by value" is?

Gerald


[PATCH] Fix -fopt-info documentation in invoke.texi

2017-05-03 Thread Steve Ellcey
The description of the default behavour of -fopt-info in invoke.texi is
wrong.  This patch fixes it.  I also added a sentence to explicitly say
what is implied by the note that -fopt-info-vec-missed is the same as
-fopt-info-missed-vec.  Namely, that order doesn't matter.

OK to checkin?

Steve Ellcey
sell...@cavium.com


2017-05-03  Steve Ellcey  

* doc/invoke.texi (-fopt-info): Fix description of default
behavour. Explicitly say order of options included in -fopt-info
does not matter.


diff --git a/gcc/doc/optinfo.texi b/gcc/doc/optinfo.texi
index e17cb37..f4158a0 100644
--- a/gcc/doc/optinfo.texi
+++ b/gcc/doc/optinfo.texi
@@ -208,16 +208,17 @@ optimized locations from all the inlining passes into
 If the @var{filename} is provided, then the dumps from all the
 applicable optimizations are concatenated into the @file{filename}.
 Otherwise the dump is output onto @file{stderr}. If @var{options} is
-omitted, it defaults to @option{all-all}, which means dump all
-available optimization info from all the passes. In the following
-example, all optimization info is output on to @file{stderr}.
+omitted, it defaults to @option{optimized-optall}, which means dump
+information about successfully applied optimizations from all the passes.
+In the following example, the optimization info is output on to @file{stderr}.
 
 @smallexample
 gcc -O3 -fopt-info
 @end smallexample
 
 Note that @option{-fopt-info-vec-missed} behaves the same as
-@option{-fopt-info-missed-vec}.
+@option{-fopt-info-missed-vec}.  The order of the optimization group
+names and message types listed after @option{-fopt-info} does not matter.
 
 As another example, consider


Re: [testsuite, committed] Replace absolute line numbers in c-c++-common

2017-05-03 Thread Mike Stump
On May 3, 2017, at 12:39 AM, Tom de Vries  wrote:
> diff --git a/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c 
> b/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c
> index 8f14034..7df1804 100644
> --- a/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c
> +++ b/gcc/testsuite/c-c++-common/Wshift-negative-value-1.c
> @@ -7,6 +7,7 @@ enum E {
>   A = 0 << 1,
>   B = 1 << 1,
>   C = -1 << 1, /* { dg-warning "left shift of negative value|not an integer 
> constant" } */
> +  /* { dg-error "left operand of shift expression" "shift" { target c++ } 
> .-1 } */
>   D = 0 >> 1,
>   E = 1 >> 1,
>   F = -1 >> 1
> @@ -47,5 +48,3 @@ right (int x)
>   r += -1U >> x;
>   return r;
> }
> -
> -/* { dg-error "left operand of shift expression" "shift" { target c++ } 9 } 
> */

I like this type of change, thanks.

Cap niter_for_unrolled_loop to upper bound

2017-05-03 Thread Richard Sandiford
For the reasons explained in PR77536, niter_for_unrolled_loop assumes 5
iterations in the absence of profiling information, although it doesn't
increase beyond the estimate for the original loop.  This left a hole in
which the new estimate could be less than the old one but still greater
than the limit imposed by CEIL (nb_iterations_upper_bound, unroll factor).

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
2017-05-04  Richard Sandiford  

* tree-ssa-loop-manip.c (niter_for_unrolled_loop): Add commentary
to explain the use of truncating division.  Cap the number of
iterations to the maximum given by nb_iterations_upper_bound,
if defined.

gcc/testsuite/
* gcc.dg/vect/vect-profile-1.c: New test.

Index: gcc/tree-ssa-loop-manip.c
===
--- gcc/tree-ssa-loop-manip.c   2017-05-03 08:46:26.068861808 +0100
+++ gcc/tree-ssa-loop-manip.c   2017-05-04 07:41:56.686034705 +0100
@@ -1104,6 +1104,9 @@ niter_for_unrolled_loop (struct loop *lo
   gcc_assert (factor != 0);
   bool profile_p = false;
   gcov_type est_niter = expected_loop_iterations_unbounded (loop, &profile_p);
+  /* Note that this is really CEIL (est_niter + 1, factor) - 1, where the
+ "+ 1" converts latch iterations to loop iterations and the "- 1"
+ converts back.  */
   gcov_type new_est_niter = est_niter / factor;
 
   /* Without profile feedback, loops for which we do not know a better estimate
@@ -1120,6 +1123,15 @@ niter_for_unrolled_loop (struct loop *lo
new_est_niter = 5;
 }
 
+  if (loop->any_upper_bound)
+{
+  /* As above, this is really CEIL (upper_bound + 1, factor) - 1.  */
+  widest_int bound = wi::udiv_floor (loop->nb_iterations_upper_bound,
+factor);
+  if (wi::ltu_p (bound, new_est_niter))
+   new_est_niter = bound.to_uhwi ();
+}
+
   return new_est_niter;
 }
 
Index: gcc/testsuite/gcc.dg/vect/vect-profile-1.c
===
--- /dev/null   2017-05-04 07:24:39.449302696 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-profile-1.c  2017-05-04 07:41:56.685075916 
+0100
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fdump-tree-vect-details-blocks" } */
+
+/* At least one of these should correspond to a full vector.  */
+
+void
+f1 (int *x)
+{
+  for (int j = 0; j < 2; ++j)
+x[j] += 1;
+}
+
+void
+f2 (int *x)
+{
+  for (int j = 0; j < 4; ++j)
+x[j] += 1;
+}
+
+void
+f3 (int *x)
+{
+  for (int j = 0; j < 8; ++j)
+x[j] += 1;
+}
+
+void
+f4 (int *x)
+{
+  for (int j = 0; j < 16; ++j)
+x[j] += 1;
+}
+
+/* { dg-final { scan-tree-dump {goto ; \[0+.0*%\]} vect } } */


Misc. BRIG/HSAIL FE updates

2017-05-03 Thread Pekka Jääskeläinen
Hi,

In r247576 I committed the patch below, which has minor updates and
fixes to the BRIG/HSAIL frontend.


Index: gcc/brig/brigfrontend/brig-code-entry-handler.cc
===
--- gcc/brig/brigfrontend/brig-code-entry-handler.cc (revision 247575)
+++ gcc/brig/brigfrontend/brig-code-entry-handler.cc (revision 247576)
@@ -464,7 +464,24 @@
   uint64_t offs = gccbrig_to_uint64_t (addr_operand.offset);
   if (offs > 0 || addr == NULL_TREE)
 {
-  tree const_offset_2 = build_int_cst (size_type_node, offs);
+  /* In large mode, the offset is treated as 32bits unless it's
+ global, readonly or kernarg address space.
+ See:
+ http://www.hsafoundation.com/html_spec111/HSA_Library.htm
+ #PRM/Topics/02_ProgModel/small_and_large_machine_models.htm
+ #table_machine_model_data_sizes */
+
+  int is64b_offset = segment == BRIG_SEGMENT_GLOBAL
+ || segment == BRIG_SEGMENT_READONLY
+ || segment == BRIG_SEGMENT_KERNARG;
+
+  /* The original offset is signed and should be sign
+ extended for the pointer arithmetics.  */
+  tree const_offset_2 = is64b_offset
+? build_int_cst (size_type_node, offs)
+: convert (long_integer_type_node,
+   build_int_cst (integer_type_node, offs));
+
   if (addr == NULL_TREE)
  addr = const_offset_2;
   else
@@ -1265,6 +1282,10 @@
   operand_type = uint32_type_node;
   half_to_float = false;
  }
+  else if (brig_inst.opcode == BRIG_OPCODE_ACTIVELANEPERMUTE && i == 4)
+ {
+  operand_type = uint32_type_node;
+ }
   else if (half_to_float)
  /* Treat the operands as the storage type at this point.  */
  operand_type = half_storage_type;
Index: gcc/brig/ChangeLog
===
--- gcc/brig/ChangeLog (revision 247575)
+++ gcc/brig/ChangeLog (revision 247576)
@@ -1,3 +1,11 @@
+2017-05-03  Pekka Jääskeläinen  
+
+ * brigfrontend/brig-code-entry-handler.cc
+ (brig_code_entry_handler::build_address_operand): Fix a bug
+ with reg+offset addressing on 32b segments. In large mode,
+ the offset is treated as 32bits unless it's global, readonly or
+ kernarg address space.
+
 2016-02-01  Pekka Jääskeläinen  

  * brigfrontend/brig-code-entry-handler.cc: fix address
Index: gcc/ChangeLog
===
--- gcc/ChangeLog (revision 247575)
+++ gcc/ChangeLog (revision 247576)
@@ -1,3 +1,8 @@
+2017-05-04  Pekka Jääskeläinen  
+
+ * brig-builtins.def: Added a builtin for class_f64.
+ * builtin-types.def: Added a builtin type needed by class_f64.
+
 2017-05-03  Jason Merrill  

  * timevar.def: Add TV_CONSTEXPR.
@@ -71,6 +76,7 @@
  * ipa-inline.h (inline_summary): Add ctor.
  (create_ggc): Do not use ggc_cleared_alloc.

+>>> .r247575
^--- I removed this in the next commit.

 2017-05-03  Jeff Downs  
 Rainer Orth  

Index: gcc/builtin-types.def
===
--- gcc/builtin-types.def (revision 247575)
+++ gcc/builtin-types.def (revision 247576)
@@ -348,6 +348,8 @@
  BT_INT, BT_INT, BT_INT)
 DEF_FUNCTION_TYPE_2 (BT_FN_UINT_FLOAT_UINT,
  BT_UINT, BT_FLOAT, BT_UINT)
+DEF_FUNCTION_TYPE_2 (BT_FN_UINT_DOUBLE_UINT,
+ BT_UINT, BT_DOUBLE, BT_UINT)
 DEF_FUNCTION_TYPE_2 (BT_FN_FLOAT_UINT_UINT,
  BT_FLOAT, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_2 (BT_FN_ULONG_UINT_UINT,
Index: gcc/brig-builtins.def
===
--- gcc/brig-builtins.def (revision 247575)
+++ gcc/brig-builtins.def (revision 247576)
@@ -222,6 +222,10 @@
   BRIG_TYPE_F32, "__hsail_class_f32", BT_FN_UINT_FLOAT_UINT,
   ATTR_PURE_NOTHROW_LEAF_LIST)

+DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_CLASS_F64, BRIG_OPCODE_CLASS,
+  BRIG_TYPE_F64, "__hsail_class_f64", BT_FN_UINT_DOUBLE_UINT,
+  ATTR_PURE_NOTHROW_LEAF_LIST)
+
 DEF_HSAIL_BUILTIN (BUILT_IN_HSAIL_CLASS_F32_F16, BRIG_OPCODE_CLASS,
   BRIG_TYPE_F16, "__hsail_class_f32_f16", BT_FN_UINT_FLOAT_UINT,
   ATTR_PURE_NOTHROW_LEAF_LIST)
Index: libhsail-rt/rt/workitems.c
===
--- libhsail-rt/rt/workitems.c (revision 247575)
+++ libhsail-rt/rt/workitems.c (revision 247576)
@@ -63,10 +63,6 @@
 #define FIBER_STACK_SIZE (64*1024)
 #define GROUP_SEGMENT_ALIGN 256

-/* HSA requires WGs to be executed in flat work-group id order.  Enabling
-   the following macro can reveal test cases that rely on the ordering,
-   but is not useful for much else.  */
-
 uint32_t __hsail_workitemabsid (uint32_t dim, PHSAWorkItem *context);

 uint32_t __hsail_workitemid (uint32_t dim, PHSAWorkItem *context);
Index: libhsail-rt/rt/arithmetic.c
===
--- libhsail-rt/rt/arithmetic.c (revision 247575)
+++ libhsail-rt/rt/arithmetic.c (revision 247576)
@@ -424,18 +424,34 @@
 uint32_t
 __hsail_class_f32 (float a, uint32_t flags)
 {
-  return (flags & 0x0001 && isnan (a) && !(*(uint32_t *)