date:20180406

[Committed 0/3] IBM Z testsuite fixes

2018-04-06 Thread Andreas Krebbel

These patch fix a couple of testsuite fails for IBM Z.

Bootstrapped and regression tested on s390x.

Committed to mainline

Andreas Krebbel (3):
  Wattributes.c testcase: Disable warning check for IBM Z.
  IBM Z: Use the dedicated NOP instructions for "nop"
  IBM Z: Fix vcond-shift testcase.

 gcc/config/s390/s390.c |   4 +-
 gcc/config/s390/s390.md|  17 ++-
 gcc/testsuite/c-c++-common/Wattributes.c   |   3 +-
 gcc/testsuite/gcc.dg/Wattributes-6.c   |   3 +-
 gcc/testsuite/gcc.target/s390/hotpatch-1.c |   7 +
 gcc/testsuite/gcc.target/s390/hotpatch-10.c|   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-11.c|   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-12.c|   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-13.c|   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-14.c|   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-15.c|   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-16.c|   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-17.c|   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-18.c|   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-19.c|   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-2.c |   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-3.c |   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-4.c |   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-5.c |   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-6.c |   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-7.c |   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-8.c |   1 +
 gcc/testsuite/gcc.target/s390/hotpatch-9.c |   1 +
 gcc/testsuite/gcc.target/s390/s390.exp |   2 +-
 gcc/testsuite/gcc.target/s390/vector/vcond-shift.c | 163 +
 25 files changed, 180 insertions(+), 37 deletions(-)

-- 
2.9.1

[PATCH 2/3] IBM Z: Use the dedicated NOP instructions for "nop"

2018-04-06 Thread Andreas Krebbel

We still use lr r0,r0 as a NOP instruction although we have some kind
of dedicated NOP instruction (nopr) which maps to a "branch never".

As a side-effect this fixes testcases scanning for NOPs
e.g. patchable_function_entry-*.

As another side-effect this makes it difficult to distingiush NOPs
generated for hotpatching from NOPs added when using -O0 to attach
location information to it.  Hence I had to make sure that the hotpatch
testcases get skipped when compiling without optimization.

gcc/ChangeLog:

2018-04-06  Andreas Krebbel  

* config/s390/s390.c (s390_z10_optimize_cmp): Expand dedicated NOP
instructions.
* config/s390/s390.md (UNSPECV_NOP_LR_0, UNSPECV_NOP_LR_1): New
constant definitions.
("nop"): lr 0,0 -> nopr r0
("nop_lr0", "nop_lr1"): New insn definitions.

gcc/testsuite/ChangeLog:

2018-04-06  Andreas Krebbel  

* gcc.target/s390/s390.exp: Remove -O0 from list of torture
options.
* gcc.target/s390/hotpatch-1.c: Skip when building without
optimization.
* gcc.target/s390/hotpatch-10.c: Likewise.
* gcc.target/s390/hotpatch-11.c: Likewise.
* gcc.target/s390/hotpatch-12.c: Likewise.
* gcc.target/s390/hotpatch-13.c: Likewise.
* gcc.target/s390/hotpatch-14.c: Likewise.
* gcc.target/s390/hotpatch-15.c: Likewise.
* gcc.target/s390/hotpatch-16.c: Likewise.
* gcc.target/s390/hotpatch-17.c: Likewise.
* gcc.target/s390/hotpatch-18.c: Likewise.
* gcc.target/s390/hotpatch-19.c: Likewise.
* gcc.target/s390/hotpatch-2.c: Likewise.
* gcc.target/s390/hotpatch-3.c: Likewise.
* gcc.target/s390/hotpatch-4.c: Likewise.
* gcc.target/s390/hotpatch-5.c: Likewise.
* gcc.target/s390/hotpatch-6.c: Likewise.
* gcc.target/s390/hotpatch-7.c: Likewise.
* gcc.target/s390/hotpatch-8.c: Likewise.
* gcc.target/s390/hotpatch-9.c: Likewise.
---
 gcc/config/s390/s390.c  |  4 ++--
 gcc/config/s390/s390.md | 17 +++--
 gcc/testsuite/gcc.target/s390/hotpatch-1.c  |  7 +++
 gcc/testsuite/gcc.target/s390/hotpatch-10.c |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-11.c |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-12.c |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-13.c |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-14.c |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-15.c |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-16.c |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-17.c |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-18.c |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-19.c |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-2.c  |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-3.c  |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-4.c  |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-5.c  |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-6.c  |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-7.c  |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-8.c  |  1 +
 gcc/testsuite/gcc.target/s390/hotpatch-9.c  |  1 +
 gcc/testsuite/gcc.target/s390/s390.exp  |  2 +-
 22 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 51adb0d..59f5de9 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -14376,9 +14376,9 @@ s390_z10_optimize_cmp (rtx_insn *insn)
  && s390_non_addr_reg_read_p (*op0, prev_insn))
{
  if (REGNO (*op1) == 0)
-   emit_insn_after (gen_nop1 (), insn);
+   emit_insn_after (gen_nop_lr1 (), insn);
  else
-   emit_insn_after (gen_nop (), insn);
+   emit_insn_after (gen_nop_lr0 (), insn);
  insn_added_p = true;
}
   else
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 5481f13..c4d391b 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -267,6 +267,10 @@
UNSPECV_CAS
UNSPECV_ATOMIC_OP
 
+   ; Non-branch nops used for compare-and-branch adjustments on z10
+   UNSPECV_NOP_LR_0
+   UNSPECV_NOP_LR_1
+
; Hotpatching (unremovable NOPs)
UNSPECV_NOP_2_BYTE
UNSPECV_NOP_4_BYTE
@@ -10998,12 +11002,21 @@
 (define_insn "nop"
   [(const_int 0)]
   ""
+  "nopr\t%%r0"
+  [(set_attr "op_type" "RR")])
+
+; non-branch NOPs required for optimizing compare-and-branch patterns
+; on z10
+
+(define_insn "nop_lr0"
+  [(unspec_volatile [(const_int 0)] UNSPECV_NOP_LR_0)]
+  ""
   "lr\t0,0"
   [(set_attr "op_type" "RR")
(set_attr "z10prop"  "z10_fr_E1")])
 
-(define_insn "nop1"
-  [(const_int 1)]
+(define_insn "nop_lr1"
+  [(unspec_volatile [(const_int 0)] UNSPECV_NOP_LR_1)]
   ""
   "lr\t1,1"
   [(set_attr "op_type" "RR")])
diff --git a/gcc/testsuite/gcc.target/s390/hotpatch-1.c 
b/gcc/testsuite/gcc.target/s390/hotpatch-1.c
index 5f0f2e1..67e101e 100644
--- a/gcc/testsuite/gcc.target/s390/hotpatch-1.c
+++ b/gcc/testsuite/gcc.target/s390/hotpatch-1.c
@@ -3,6 +3,13 @@
 /* { dg-do comp

[PATCH 1/3] Wattributes.c testcase: Disable warning check for IBM Z.

2018-04-06 Thread Andreas Krebbel

On IBM Z we enforce function alignment to 8 bytes.  Hence we get an
error instead of a warning when trying to specify smaller alignments.

gcc/testsuite/ChangeLog:

2018-04-06  Andreas Krebbel  

* c-c++-common/Wattributes.c: Disable warning for s390* target and
check for an error instead.
* gcc.dg/Wattributes-6.c: Likewise.
---
 gcc/testsuite/c-c++-common/Wattributes.c | 3 ++-
 gcc/testsuite/gcc.dg/Wattributes-6.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/Wattributes.c 
b/gcc/testsuite/c-c++-common/Wattributes.c
index 902bcb6..40a2985 100644
--- a/gcc/testsuite/c-c++-common/Wattributes.c
+++ b/gcc/testsuite/c-c++-common/Wattributes.c
@@ -401,7 +401,8 @@ inline int ATTR ((warn_unused_result))
 finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute 
.warn_unused_result. because it conflicts with attribute .noreturn." } */
 
 inline int ATTR ((aligned (4)))
-finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute .aligned 
\\(4\\). because it conflicts with attribute .aligned \\(8\\)." } */
+  finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute .aligned 
\\(4\\). because it conflicts with attribute .aligned \\(8\\)." "" { target { ! 
s390*-*-* } } } */
+/* { dg-error "alignment for '.*finline_hot_noret_align.*' must be at least 8" 
"" { target s390*-*-* } .-1 } */
 
 inline int ATTR ((aligned (8)))
 finline_hot_noret_align (int);
diff --git a/gcc/testsuite/gcc.dg/Wattributes-6.c 
b/gcc/testsuite/gcc.dg/Wattributes-6.c
index 902bcb6..a260d01 100644
--- a/gcc/testsuite/gcc.dg/Wattributes-6.c
+++ b/gcc/testsuite/gcc.dg/Wattributes-6.c
@@ -401,7 +401,8 @@ inline int ATTR ((warn_unused_result))
 finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute 
.warn_unused_result. because it conflicts with attribute .noreturn." } */
 
 inline int ATTR ((aligned (4)))
-finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute .aligned 
\\(4\\). because it conflicts with attribute .aligned \\(8\\)." } */
+  finline_hot_noret_align (int);  /* { dg-warning "ignoring attribute .aligned 
\\(4\\). because it conflicts with attribute .aligned \\(8\\)." "" { target { ! 
s390*-*-* } } } */
+/* { dg-error "alignment for 'finline_hot_noret_align' must be at least 8" "" 
{ target s390*-*-* } .-1 } */
 
 inline int ATTR ((aligned (8)))
 finline_hot_noret_align (int);
-- 
2.9.1

[PATCH 3/3] IBM Z: Fix vcond-shift testcase.

2018-04-06 Thread Andreas Krebbel

gcc/testsuite/ChangeLog:

2018-04-06  Andreas Krebbel  

* gcc.target/s390/vector/vcond-shift.c: Use the proper conditions
to trigger the optimization. Do some cleanup and function
renaming.  Add more test functions.
---
 gcc/testsuite/gcc.target/s390/vector/vcond-shift.c | 163 +
 1 file changed, 133 insertions(+), 30 deletions(-)

diff --git a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c 
b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
index cc40153..a6b4e97 100644
--- a/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vcond-shift.c
@@ -3,71 +3,174 @@
 /* { dg-do compile { target { s390*-*-* } } } */
 /* { dg-options "-O3 -march=z13 -mzarch" } */
 
-/* { dg-final { scan-assembler "vesraf\t%v.?,%v.?,31" } } */
-/* { dg-final { scan-assembler "vesrah\t%v.?,%v.?,15" } } */
-/* { dg-final { scan-assembler "vesrab\t%v.?,%v.?,7" } } */
+/* { dg-final { scan-assembler-times "vesraf\t%v.?,%v.?,31" 6 } } */
+/* { dg-final { scan-assembler-times "vesrah\t%v.?,%v.?,15" 6 } } */
+/* { dg-final { scan-assembler-times "vesrab\t%v.?,%v.?,7" 6 } } */
 /* { dg-final { scan-assembler-not "vzero\t*" } } */
-/* { dg-final { scan-assembler "vesrlf\t%v.?,%v.?,31" } } */
-/* { dg-final { scan-assembler "vesrlh\t%v.?,%v.?,15" } } */
-/* { dg-final { scan-assembler "vesrlb\t%v.?,%v.?,7" } } */
+/* { dg-final { scan-assembler-times "vesrlf\t%v.?,%v.?,31" 4 } } */
+/* { dg-final { scan-assembler-times "vesrlh\t%v.?,%v.?,15" 4 } } */
+/* { dg-final { scan-assembler-times "vesrlb\t%v.?,%v.?,7" 4 } } */
 
-#define SZ 8
-#define SZ2 16
-#define SZ3 32
+/* Make it expand to two vector operations.  */
+#define ITER(X) (2 * (16 / sizeof (X[1])))
 
-void foo(int *w)
+void
+vesraf_div (int *x)
 {
   int i;
-  int *ww = __builtin_assume_aligned (w, 8);
+  int *xx = __builtin_assume_aligned (x, 8);
 
-  /* Should expand to (ww + (ww < 0 ? 1 : 0)) >> 1
- which in turn should get simplified to (ww + (ww >> 31)) >> 1.  */
-  for (i = 0; i < SZ; i++)
-ww[i] = ww[i] / 2;
+  /* Should expand to (xx + (xx < 0 ? 1 : 0)) >> 1
+ which in turn should get simplified to (xx + (xx >> 31)) >> 1.  */
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] / 2;
 }
 
-void foo2(short *w)
+void
+vesrah_div (short *x)
 {
   int i;
-  short *ww = __builtin_assume_aligned (w, 8);
+  short *xx = __builtin_assume_aligned (x, 8);
 
-  for (i = 0; i < SZ2; i++)
-ww[i] = ww[i] / 2;
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] / 2;
 }
 
 
-void foo3(signed char *w)
+void
+vesrab_div (signed char *x)
 {
   int i;
-  signed char *ww = __builtin_assume_aligned (w, 8);
+  signed char *xx = __builtin_assume_aligned (x, 8);
 
-  for (i = 0; i < SZ3; i++)
-ww[i] = ww[i] / 2;
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] / 2;
 }
 
-int baz(int *x)
+
+
+int
+vesraf_lt (int *x)
 {
   int i;
   int *xx = __builtin_assume_aligned (x, 8);
 
-  for (i = 0; i < SZ; i++)
+  for (i = 0; i < ITER (xx); i++)
 xx[i] = xx[i] < 0 ? -1 : 0;
 }
 
-int baf(short *x)
+int
+vesrah_lt (short *x)
 {
   int i;
   short *xx = __builtin_assume_aligned (x, 8);
 
-  for (i = 0; i < SZ2; i++)
-xx[i] = xx[i] >= 0 ? 0 : 1;
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] < 0 ? -1 : 0;
 }
 
-int bal(signed char *x)
+int
+vesrab_lt (signed char *x)
 {
   int i;
   signed char *xx = __builtin_assume_aligned (x, 8);
 
-  for (i = 0; i < SZ3; i++)
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] < 0 ? -1 : 0;
+}
+
+
+
+int
+vesraf_ge (int *x)
+{
+  int i;
+  int *xx = __builtin_assume_aligned (x, 8);
+
+  for (i = 0; i < ITER (xx); i++)
 xx[i] = xx[i] >= 0 ? 0 : -1;
 }
+
+int
+vesrah_ge (short *x)
+{
+  int i;
+  short *xx = __builtin_assume_aligned (x, 8);
+
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] >= 0 ? 0 : -1;
+}
+
+int
+vesrab_ge (signed char *x)
+{
+  int i;
+  signed char *xx = __builtin_assume_aligned (x, 8);
+
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] >= 0 ? 0 : -1;
+}
+
+
+
+int
+vesrlf_lt (int *x)
+{
+  int i;
+  int *xx = __builtin_assume_aligned (x, 8);
+
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] < 0 ? 1 : 0;
+}
+
+int
+vesrlh_lt (short *x)
+{
+  int i;
+  short *xx = __builtin_assume_aligned (x, 8);
+
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] < 0 ? 1 : 0;
+}
+
+int
+vesrlb_lt (signed char *x)
+{
+  int i;
+  signed char *xx = __builtin_assume_aligned (x, 8);
+
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] < 0 ? 1 : 0;
+}
+
+
+
+int
+vesrlf_ge (int *x)
+{
+  int i;
+  int *xx = __builtin_assume_aligned (x, 8);
+
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] >= 0 ? 0 : 1;
+}
+
+int
+vesrlh_ge (short *x)
+{
+  int i;
+  short *xx = __builtin_assume_aligned (x, 8);
+
+  for (i = 0; i < ITER (xx); i++)
+xx[i] = xx[i] >= 0 ? 0 : 1;
+}
+
+int
+vesrlb_ge (signed char *x)
+{
+  int i;
+  signed char *xx = __builtin_assume_aligned (x, 8);
+
+  for (i = 0; i < ITER (xx); i++)
+xx[i]

PR libstdc++/83860 avoid dangling references in valarray closure types

2018-04-06 Thread Jonathan Wakely

This attempts to solve some of the problems when mixing std::valarray
operations and 'auto', by storing nested closure objects as values
instead of references. This means we don't end up with dangling
references to temporary closures that have already been destroyed.

This makes the closure objects introduced by the library less
error-prone, but it's still possible to write incorrect code by using
temporary valarrays, e.g.

std::valarray f();
auto x = f() * 2;
std::valarray y = x;

Here the closure object 'x' has a dangling reference to the temporary
returned by f(). It might be possible to do something about this by
overloading the operators for valarray rvalues and moving the valarray
into the closure, instead of holding a const-reference. I don't plan
to work on that.

Performance seems to be unaffected by this patch, although I didn't
test that very thoroughly.

Strictly speaking this is an ABI change as it changes the size and
layout of the closure types like _BinClos etc. so that their _M_expr
member is at least two words, not just one for a reference. Those
closure types should never appear in API boundaries or as class
members (if anybody was doing that by using 'auto' it wouldn't have
worked correctly anyway) so I think we can just change them, without
renaming the types or moving them into an inline namespace so they
mangle differently. Does anybody disagree?

The PR is marked as a regression because the example in the PR used to
"work" with older releases. That's probably only because they didn't
optimize as aggressively and so the stack space of the expired
temporaries wasn't reused (ASan still shows the bug was there in older
releases even if it doesn't crash). As a regression this should be
backported, but the layout changes make me pause a little when
considering making the change on stable release branches.

PR libstdc++/83860
* include/bits/gslice_array.h (gslice_array): Define default
constructor as deleted, as per C++11 standard.
* include/bits/mask_array.h (mask_array): Likewise.
* include/bits/slice_array.h (slice_array): Likewise.
* include/bits/valarray_after.h (_GBase::_M_expr, _IBase::_M_expr):
Use _ValArrayRef for type of data members.
* include/bits/valarray_before.h (_ValArrayRef): New helper for type
of data members in closure objects.
(_FunBase::_M_expr, _UnBase::_M_expr, _BinBase::_M_expr1)
(_BinBase::_M_expr2, _BinBase2::_M_expr1, _BinBase1::_M_expr2)
(_SBase::_M_expr): Use _ValArrayRef for type of data members.
* testsuite/26_numerics/valarray/83860.cc: New.

I'll commit this to trunk only for now, unless anybody sees a problem
with the approach, or thinks the layout changes require new mangled
names for the closures.
commit 1dd9f0f54457eb2c44c6b1125800d94eaac0cb2f
Author: Jonathan Wakely 
Date:   Fri Apr 6 01:30:19 2018 +0100

PR libstdc++/83860 avoid dangling references in valarray closure types

PR libstdc++/83860
* include/bits/gslice_array.h (gslice_array): Define default
constructor as deleted, as per C++11 standard.
* include/bits/mask_array.h (mask_array): Likewise.
* include/bits/slice_array.h (slice_array): Likewise.
* include/bits/valarray_after.h (_GBase::_M_expr, _IBase::_M_expr):
Use _ValArrayRef for type of data members.
* include/bits/valarray_before.h (_ValArrayRef): New helper for type
of data members in closure objects.
(_FunBase::_M_expr, _UnBase::_M_expr, _BinBase::_M_expr1)
(_BinBase::_M_expr2, _BinBase2::_M_expr1, _BinBase1::_M_expr2)
(_SBase::_M_expr): Use _ValArrayRef for type of data members.
* testsuite/26_numerics/valarray/83860.cc: New.

diff --git a/libstdc++-v3/include/bits/gslice_array.h 
b/libstdc++-v3/include/bits/gslice_array.h
index 2da7e0442bb..715c53bd616 100644
--- a/libstdc++-v3/include/bits/gslice_array.h
+++ b/libstdc++-v3/include/bits/gslice_array.h
@@ -128,8 +128,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   gslice_array(_Array<_Tp>, const valarray&);
 
+#if __cplusplus < 201103L
   // not implemented
   gslice_array();
+#else
+public:
+  gslice_array() = delete;
+#endif
 };
 
   template
diff --git a/libstdc++-v3/include/bits/mask_array.h 
b/libstdc++-v3/include/bits/mask_array.h
index 84671cb43d6..c11691a243a 100644
--- a/libstdc++-v3/include/bits/mask_array.h
+++ b/libstdc++-v3/include/bits/mask_array.h
@@ -131,8 +131,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   const _Array _M_mask;
   const _Array<_Tp>  _M_array;
 
+#if __cplusplus < 201103L
   // not implemented
   mask_array();
+#else
+public:
+  mask_array() = delete;
+#endif
 };
 
   template
diff --git a/libstdc++-v3/include/bits/slice_array.h 
b/libstdc++-v3/include/bits/slice_array.h
index 05b096bb5a9..b025373180f 100644
--- a/libstdc++-v3/include/bits/slice_a

Re: PR libstdc++/83860 avoid dangling references in valarray closure types

2018-04-06 Thread Marc Glisse


On Fri, 6 Apr 2018, Jonathan Wakely wrote:


This attempts to solve some of the problems when mixing std::valarray
operations and 'auto', by storing nested closure objects as values
instead of references. This means we don't end up with dangling
references to temporary closures that have already been destroyed.

This makes the closure objects introduced by the library less
error-prone, but it's still possible to write incorrect code by using
temporary valarrays, e.g.

std::valarray f();
auto x = f() * 2;
std::valarray y = x;

Here the closure object 'x' has a dangling reference to the temporary
returned by f(). It might be possible to do something about this by
overloading the operators for valarray rvalues and moving the valarray
into the closure, instead of holding a const-reference. I don't plan
to work on that.

Performance seems to be unaffected by this patch, although I didn't
test that very thoroughly.

Strictly speaking this is an ABI change as it changes the size and
layout of the closure types like _BinClos etc. so that their _M_expr
member is at least two words, not just one for a reference. Those
closure types should never appear in API boundaries or as class
members (if anybody was doing that by using 'auto' it wouldn't have
worked correctly anyway) so I think we can just change them, without
renaming the types or moving them into an inline namespace so they
mangle differently. Does anybody disagree?


When I was looking into doing something similar for GMP, I had decided to 
rename the class. Some inline functions have incompatible behavior before 
and after the change, and if they are not actually inlined and you mix the 
2 types of objects (through a library for instance) without a lot of care 
on symbol visibility, a single version will be used for both.


If you think that's too contrived a scenario, then renaming may not be 
needed.



The PR is marked as a regression because the example in the PR used to
"work" with older releases. That's probably only because they didn't
optimize as aggressively and so the stack space of the expired
temporaries wasn't reused (ASan still shows the bug was there in older
releases even if it doesn't crash). As a regression this should be
backported, but the layout changes make me pause a little when
considering making the change on stable release branches.


I wouldn't count it as a regression.

--
Marc Glisse

[PATCH 1/5] [ARC] Update sleep builtin.

2018-04-06 Thread Claudiu Zissulescu

From: claziss 

gcc/
2017-05-09  Claudiu Zissulescu  

* config/arc/arc-protos.h (check_if_valid_sleep_operand): Remove.
* config/arc/arc.c (arc_expand_builtin): Sleep accepts registers
and short u6 immediate.
(check_if_valid_sleep_operand): Remove.
* config/arc/arc.md (Sleep): Accepts registers and u6 immediates.
---
 gcc/config/arc/arc-protos.h |  1 -
 gcc/config/arc/arc.c| 26 --
 gcc/config/arc/arc.md   |  4 ++--
 3 files changed, 2 insertions(+), 29 deletions(-)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 75cfeda..0ba6871 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -59,7 +59,6 @@ void arc_asm_output_aligned_decl_local (FILE *, tree, const 
char *,
unsigned HOST_WIDE_INT);
 extern rtx arc_return_addr_rtx (int , rtx);
 extern bool check_if_valid_regno_const (rtx *, int);
-extern bool check_if_valid_sleep_operand (rtx *, int);
 extern bool arc_legitimate_constant_p (machine_mode, rtx);
 extern bool arc_legitimate_pic_addr_p (rtx);
 extern bool arc_raw_symbolic_reference_mentioned_p (rtx, bool);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 3564696..47d3ba4 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -6573,11 +6573,6 @@ arc_expand_builtin (tree exp,
   fold (arg0);
   op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
 
-  if  (!CONST_INT_P (op0) || !satisfies_constraint_L (op0))
-   {
- error ("builtin operand should be an unsigned 6-bit value");
- return NULL_RTX;
-   }
   gcc_assert (icode != 0);
   emit_insn (GEN_FCN (icode) (op0));
   return NULL_RTX;
@@ -6925,27 +6920,6 @@ check_if_valid_regno_const (rtx *operands, int opno)
   return false;
 }
 
-/* Check that after all the constant folding, whether the operand to
-   __builtin_arc_sleep is an unsigned int of 6 bits.  If not, flag an error.  
*/
-
-bool
-check_if_valid_sleep_operand (rtx *operands, int opno)
-{
-  switch (GET_CODE (operands[opno]))
-{
-case CONST :
-case CONST_INT :
-   if( UNSIGNED_INT6 (INTVAL (operands[opno])))
-   return true;
-/* FALLTHRU */
-default:
-   fatal_error (input_location,
-"operand for sleep instruction must be an unsigned 6 bit 
compile-time constant");
-   break;
-}
-  return false;
-}
-
 /* Return true if it is ok to make a tail-call to DECL.  */
 
 static bool
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index fb34329..2ec2b48 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -4794,9 +4794,9 @@ archs4x, archs4xd, archs4xd_slow"
 
 
 (define_insn "sleep"
-  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "L")]
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "Lr")]
   VUNSPEC_ARC_SLEEP)]
-  "check_if_valid_sleep_operand(operands,0)"
+  ""
   "sleep %0"
   [(set_attr "length" "4")
   (set_attr "type" "misc")])
-- 
1.9.1

[PATCH 2/5] [ARC] Fix FLS, SETI patterns.

2018-04-06 Thread Claudiu Zissulescu

From: claziss 

Claudiu Zissulescu  

* config/arc/arc.md ("vunspec"): Delete it, unify all the unspec
enums into a single definition.
(fls): Fix predicates and printing.
(seti): Likewise.
---
 gcc/config/arc/arc.md | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 2ec2b48..ffd9d5b 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -137,9 +137,7 @@
   UNSPEC_ARC_VMPY2H
   UNSPEC_ARC_VMPY2HU
   UNSPEC_ARC_STKTIE
-  ])
 
-(define_c_enum "vunspec" [
   VUNSPEC_ARC_RTIE
   VUNSPEC_ARC_SYNC
   VUNSPEC_ARC_BRK
@@ -5818,21 +5816,19 @@ archs4x, archs4xd, archs4xd_slow"
})
 
 (define_insn "fls"
-  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w")
-   (unspec:SI [(match_operand:SI 1 "general_operand" "cL,Cal")]
+  [(set (match_operand:SI  0 "register_operand" "=r,r")
+   (unspec:SI [(match_operand:SI 1 "nonmemory_operand" "rL,Cal")]
UNSPEC_ARC_FLS))]
   "TARGET_NORM && TARGET_V2"
-  "@
-   fls \t%0, %1
-   fls \t%0, %1"
+  "fls\\t%0,%1"
   [(set_attr "length" "4,8")
(set_attr "type" "two_cycle_core,two_cycle_core")])
 
 (define_insn "seti"
-  [(unspec_volatile:SI [(match_operand:SI 0 "general_operand" "rL")]
+  [(unspec_volatile:SI [(match_operand:SI 0 "nonmemory_operand" "rL")]
   VUNSPEC_ARC_SETI)]
   "TARGET_V2"
-  "seti  %0"
+  "seti\\t%0"
   [(set_attr "length" "4")
(set_attr "type" "misc")])
 
-- 
1.9.1

[PATCH 5/5] [ARC] Clear the instruction cache using syscalls.

2018-04-06 Thread Claudiu Zissulescu

Clear the instruction cache from `beg' to `end'.  This makes an inline
system call to SYS_cacheflush.

gcc/
2017-03-28  Claudiu Zissulescu  

* config/arc/linux.h (CLEAR_INSN_CACHE): Define.
---
 gcc/config/arc/linux.h | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/gcc/config/arc/linux.h b/gcc/config/arc/linux.h
index 4e87dfe..96d548e 100644
--- a/gcc/config/arc/linux.h
+++ b/gcc/config/arc/linux.h
@@ -109,3 +109,17 @@ along with GCC; see the file COPYING3.  If not see
 /* Build attribute: procedure call standard.  */
 #undef ATTRIBUTE_PCS
 #define ATTRIBUTE_PCS 3
+
+/* Clear the instruction cache from `beg' to `end'.  This makes an
+   inline system call to SYS_cacheflush.  */
+#undef CLEAR_INSN_CACHE
+#define CLEAR_INSN_CACHE(beg, end) \
+{  \
+  register unsigned long _beg __asm ("r0") = (unsigned long) (beg);\
+  register unsigned long _end __asm ("r1") = (unsigned long) (end);\
+  register unsigned long _xtr __asm ("r2") = 0;
\
+  register unsigned long _scno __asm ("r8") = 244; \
+  __asm __volatile ("trap_s 0  ; sys_cache_sync"   \
+   : "=r" (_beg)   \
+   : "0" (_beg), "r" (_end), "r" (_xtr), "r" (_scno)); \
+}
-- 
1.9.1

[PATCH 4/5] [ARC] Cleanup sdata handling.

2018-04-06 Thread Claudiu Zissulescu

From: Claudiu Zissulescu 

Clean up how we handle small data load/store operations. This patch clears 
-flto-fat-lto-object LTO related errors.

gcc/
2018-01-18  Claudiu Zissulescu  

* config/arc/arc-protos.h (prepare_extend_operands): Remove.
(small_data_pattern): Likewise.
(arc_rewrite_small_data): Likewise.
* config/arc/arc.c (LEGITIMATE_SMALL_DATA_OFFSET_P): Remove.
(LEGITIMATE_SMALL_DATA_ADDRESS_P): Likewise.
(get_symbol_alignment): New function.
(legitimate_small_data_address_p): Likewise.
(legitimate_scaled_address): Update, call
legitimate_small_data_address_p.
(output_sdata): New static variable.
(arc_print_operand): Update how we handle small data operands.
(arc_print_operand_address): Likewise.
(arc_legitimate_address_p): Update, use
legitimate_small_data_address_p.
(arc_rewrite_small_data_p): Remove.
(arc_rewrite_small_data_1): Likewise.
(arc_rewrite_small_data): Likewise.
(small_data_pattern): Likewise.
(compact_sda_memory_operand): Update to use
legitimate_small_data_address_p and get_symbol_alignment.
(prepare_move_operands): Don't rewite sdata pattern.
(prepare_extend_operands): Remove.
* config/arc/arc.md (zero_extendqihi2): Don't rewrite sdata
pattern.
(zero_extendqisi2): Likewise.
(zero_extendhisi2): Likewise.
(extendqihi2): Likewise.
(extendqisi2): Likewise.
(extendhisi2): Likewise.
(addsi3): Likewise.
(subsi3): Likewise.
(andsi3): Likewise.
* config/arc/constraints.md (Usd): Change it to memory constraint.

gcc/testsuite
2018-01-18  Claudiu Zissulescu  

* gcc.target/arc/interrupt-8.c: Update test.
* gcc.target/arc/loop-4.c: Likewise.
* gcc.target/arc/loop-hazard-1.c: Likewise.
* gcc.target/arc/sdata-3.c: Likewise.
---
 gcc/config/arc/arc-protos.h  |   4 -
 gcc/config/arc/arc.c | 309 ---
 gcc/config/arc/arc.md|  22 +-
 gcc/config/arc/constraints.md|   6 +-
 gcc/testsuite/gcc.target/arc/interrupt-8.c   |   5 +-
 gcc/testsuite/gcc.target/arc/loop-4.c|   2 +-
 gcc/testsuite/gcc.target/arc/loop-hazard-1.c |   2 +-
 gcc/testsuite/gcc.target/arc/sdata-3.c   |   8 +-
 8 files changed, 110 insertions(+), 248 deletions(-)

diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 0ba6871..67f3b4e 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -33,8 +33,6 @@ extern void arc_print_operand (FILE *, rtx, int);
 extern void arc_print_operand_address (FILE *, rtx);
 extern void arc_final_prescan_insn (rtx_insn *, rtx *, int);
 extern const char *arc_output_libcall (const char *);
-extern bool prepare_extend_operands (rtx *operands, enum rtx_code code,
-machine_mode omode);
 extern int arc_output_addsi (rtx *operands, bool, bool);
 extern int arc_output_commutative_cond_exec (rtx *operands, bool);
 extern bool arc_expand_movmem (rtx *operands);
@@ -65,8 +63,6 @@ extern bool arc_raw_symbolic_reference_mentioned_p (rtx, 
bool);
 extern bool arc_is_longcall_p (rtx);
 extern bool arc_is_shortcall_p (rtx);
 extern bool valid_brcc_with_delay_p (rtx *);
-extern bool small_data_pattern (rtx , machine_mode);
-extern rtx arc_rewrite_small_data (rtx);
 extern bool arc_ccfsm_cond_exec_p (void);
 struct secondary_reload_info;
 extern int arc_register_move_cost (machine_mode, enum reg_class,
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 2ccdce8..2ce1744 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -96,22 +96,6 @@ HARD_REG_SET overrideregs;
  ? 0 \
  : -(-GET_MODE_SIZE (MODE) | -4) >> 1)))
 
-#define LEGITIMATE_SMALL_DATA_OFFSET_P(X)  \
-  (GET_CODE (X) == CONST   \
-   && GET_CODE (XEXP ((X), 0)) == PLUS \
-   && GET_CODE (XEXP (XEXP ((X), 0), 0)) == SYMBOL_REF \
-   && SYMBOL_REF_SMALL_P (XEXP (XEXP ((X), 0), 0)) \
-   && GET_CODE (XEXP(XEXP ((X), 0), 1)) == CONST_INT   \
-   && INTVAL (XEXP (XEXP ((X), 0), 1)) <= g_switch_value)
-
-#define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \
-  (GET_CODE (X) == PLUS
\
- && REG_P (XEXP ((X), 0))  \
- && REGNO (XEXP ((X), 0)) == SDATA_BASE_REGNUM \
- && ((GET_CODE (XEXP ((X), 1)) == SYMBOL_REF   \
-   && SYMBOL_REF_SMALL_P (XEXP ((X), 1)))  \
-|| LEGITIMATE_SMALL_DATA_OFFSET_P (XEXP ((X), 1
-
 /* Array of valid operand punctuation characters.  */
 char

[C++ Patch] PR 85227 ("[7/8/ Regression] ICE with structured binding of a forward declared variable")

2018-04-06 Thread Paolo Carlini


Hi,

here, for an incomplete type we ICE pretty soon in 
find_decomp_class_base. Comparing to other cases too, I convinced myself 
that trying to complete the type is Ok. Also, it seems that in these 
functions we want to talk about structured binding and use an 
appropriate location, thus no complete_type_or_maybe_complain. Tested 
x86_64-linux.


Thanks, Paolo.

/

/cp
2018-04-06  Paolo Carlini  

PR c++/85227
* decl.c (find_decomp_class_base): Try to complete the type
and give an error if that fails.

/testsuite
2018-04-06  Paolo Carlini  

PR c++/85227
* g++.dg/cpp1z/decomp42.C: New.
Index: cp/decl.c
===
--- cp/decl.c   (revision 259156)
+++ cp/decl.c   (working copy)
@@ -7310,6 +7310,13 @@ cp_finish_decl (tree decl, tree init, bool init_co
 static tree
 find_decomp_class_base (location_t loc, tree type, tree ret)
 {
+  if (!COMPLETE_TYPE_P (complete_type (type)))
+{
+  error_at (loc, "structured binding refers to incomplete class type %qT",
+   type);
+  return error_mark_node;
+}
+
   bool member_seen = false;
   for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
 if (TREE_CODE (field) != FIELD_DECL
Index: testsuite/g++.dg/cpp1z/decomp42.C
===
--- testsuite/g++.dg/cpp1z/decomp42.C   (nonexistent)
+++ testsuite/g++.dg/cpp1z/decomp42.C   (working copy)
@@ -0,0 +1,10 @@
+// PR c++/85227
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+extern struct A a;
+
+template void foo()
+{
+  auto[i] = a;  // { dg-error "incomplete" }
+}  // { dg-warning "structured bindings only available with -std=c..17 or 
-std=gnu..17" "" { target c++14_down } .-1 }

[PATCH 3/5] [ARC] Update movhi and movdi patterns.

2018-04-06 Thread Claudiu Zissulescu

From: Claudiu Zissulescu 

Allow signed 6-bit short immediates into st[d] instructions.

2017-10-19  Claudiu Zissulescu  

* config/arc/arc.c (arc_split_move): Allow signed 6-bit constants
as source of std instructions.
* config/arc/arc.md (movsi_insn): Update pattern predicate to
allow 6-bit constants as source for store instructions.
(movdi_insn): Update instruction pattern to allow 6-bit constants
as source for store instructions.

testsuite/
2017-10-19  Claudiu Zissulescu  

* gcc.target/arc/store-merge-1.c: New test.
* gcc.target/arc/add_n-combine.c: Update test.
---
 gcc/config/arc/arc.c |  3 ++-
 gcc/config/arc/arc.md| 25 +
 gcc/testsuite/gcc.target/arc/add_n-combine.c |  2 +-
 gcc/testsuite/gcc.target/arc/store-merge-1.c | 17 +
 4 files changed, 33 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/store-merge-1.c

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 47d3ba4..2ccdce8 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -9669,7 +9669,8 @@ arc_split_move (rtx *operands)
 
   if (TARGET_LL64
   && ((memory_operand (operands[0], mode)
-  && even_register_operand (operands[1], mode))
+  && (even_register_operand (operands[1], mode)
+  || satisfies_constraint_Cm3 (operands[1])))
  || (memory_operand (operands[1], mode)
  && even_register_operand (operands[0], mode
 {
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index ffd9d5b..0fc7aba 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -740,7 +740,9 @@ archs4x, archs4xd, archs4xd_slow"
/* Don't use a LIMM that we could load with a single insn - we loose
  delay-slot filling opportunities.  */
&& !satisfies_constraint_I (operands[1])
-   && satisfies_constraint_Usc (operands[0]))"
+   && satisfies_constraint_Usc (operands[0]))
+   || (satisfies_constraint_Cm3 (operands[1])
+  && memory_operand (operands[0], SImode))"
   "@
mov%? %0,%1%&   ;0
mov%? %0,%1%&   ;1
@@ -1237,10 +1239,12 @@ archs4x, archs4xd, archs4xd_slow"
   ")
 
 (define_insn_and_split "*movdi_insn"
-  [(set (match_operand:DI 0 "move_dest_operand"  "=w, w,r,m")
-   (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,c"))]
+  [(set (match_operand:DI 0 "move_dest_operand"  "=w, w,r,   m")
+   (match_operand:DI 1 "move_double_src_operand" "c,Hi,m,cCm3"))]
   "register_operand (operands[0], DImode)
-   || register_operand (operands[1], DImode)"
+   || register_operand (operands[1], DImode)
+   || (satisfies_constraint_Cm3 (operands[1])
+  && memory_operand (operands[0], DImode))"
   "*
 {
   switch (which_alternative)
@@ -1250,19 +1254,16 @@ archs4x, archs4xd, archs4xd_slow"
 
 case 2:
 if (TARGET_LL64
-   && ((even_register_operand (operands[0], DImode)
-&& memory_operand (operands[1], DImode))
-   || (memory_operand (operands[0], DImode)
-   && even_register_operand (operands[1], DImode
+&& memory_operand (operands[1], DImode)
+   && even_register_operand (operands[0], DImode))
   return \"ldd%U1%V1 %0,%1%&\";
 return \"#\";
 
 case 3:
 if (TARGET_LL64
-   && ((even_register_operand (operands[0], DImode)
-&& memory_operand (operands[1], DImode))
-   || (memory_operand (operands[0], DImode)
-   && even_register_operand (operands[1], DImode
+   && memory_operand (operands[0], DImode)
+   && (even_register_operand (operands[1], DImode)
+   || satisfies_constraint_Cm3 (operands[1])))
  return \"std%U0%V0 %1,%0\";
 return \"#\";
 }
diff --git a/gcc/testsuite/gcc.target/arc/add_n-combine.c 
b/gcc/testsuite/gcc.target/arc/add_n-combine.c
index db6454f..cd32ed3 100644
--- a/gcc/testsuite/gcc.target/arc/add_n-combine.c
+++ b/gcc/testsuite/gcc.target/arc/add_n-combine.c
@@ -45,4 +45,4 @@ void f() {
   a(at3.bn[bu]);
 }
 
-/* { dg-final { scan-rtl-dump-times "\\*add_n" 3 "combine" } } */
+/* { dg-final { scan-rtl-dump-times "\\*add_n" 2 "combine" } } */
diff --git a/gcc/testsuite/gcc.target/arc/store-merge-1.c 
b/gcc/testsuite/gcc.target/arc/store-merge-1.c
new file mode 100644
index 000..4bb8dcb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/store-merge-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+/* This tests checks if we use st w6,[reg] format.  */
+
+typedef struct {
+  unsigned long __val[2];
+} sigset_t;
+
+int sigemptyset2 (sigset_t *set)
+{
+  set->__val[0] = 0;
+  set->__val[1] = 0;
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "st 0,\\\[r" 2 } } */
-- 
1.9.1

[PATCH 0/5] [ARC] General fixes

2018-04-06 Thread Claudiu Zissulescu

From: claziss 

Hi Andrew,

Please find a the following patches which are fixing a number of issues in ARC 
toolchain.

//Claudiu

  [ARC] Update movhi and movdi patterns.
This patch allows st w6,[b, s9] instructions to be used.

  [ARC] Cleanup sdata handling.
This cleanup is required to fix a number of lto issues.

  [ARC] Clear the instruction cache using syscalls.
Required by linux like systems.

  [ARC] Update sleep builtin.
Update the sleep builtin.

  [ARC] Fix FLS, SETI patterns.
Likewise.

 gcc/config/arc/arc-protos.h  |   5 -
 gcc/config/arc/arc.c | 338 ---
 gcc/config/arc/arc.md|  65 +++---
 gcc/config/arc/constraints.md|   6 +-
 gcc/config/arc/linux.h   |  14 ++
 gcc/testsuite/gcc.target/arc/add_n-combine.c |   2 +-
 gcc/testsuite/gcc.target/arc/interrupt-8.c   |   5 +-
 gcc/testsuite/gcc.target/arc/loop-4.c|   2 +-
 gcc/testsuite/gcc.target/arc/loop-hazard-1.c |   2 +-
 gcc/testsuite/gcc.target/arc/sdata-3.c   |   8 +-
 gcc/testsuite/gcc.target/arc/store-merge-1.c |  17 ++
 11 files changed, 164 insertions(+), 300 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/store-merge-1.c

-- 
1.9.1

[PATCH] Mitigate PR85222 a bit

2018-04-06 Thread Richard Biener


The following allows people configuring the gcc-4 compatible ABI
as the default ABI to retain compatibility with old programs
catching ios_base::failure by matching the abi version thrown
to the configured default ABI.

This doesn't really fix the PR but it makes behavior between
the dual-ABI with default gcc-4 compatible consistent with that
of the non-dual-ABI which is what I had expected.

Whether an ABI break is really desired for the case of a c++11
default ABI is still questionable and any programs that differ
from the default ABI choice will now be broken (compared to
those differing from c++11).

Bootstrapped on x86_64-unknown-linux-gnu, ok for trunk?  Ok for
the GCC 7 branch?

I'm not sure if we want to revert r245167 after this?  I'm
re-running the testsuite with a gcc4-compatible ABI right now.
At least with a "real" fix we should be able to run the affected
tests twice, once with the new and once with the old ABI.

Thanks,
Richard.

2018-04-06  Richard Biener  

PR libstdc++/85222
* src/c++11/ios.cc: Remove hard define of _GLIBCXX_USE_CXX11_ABI to 1.
Instead use the configured default ABI to decide the ABI version
of ios_base::failure thrown by __throw_ios_failure.

Index: libstdc++-v3/src/c++11/ios.cc
===
--- libstdc++-v3/src/c++11/ios.cc   (revision 258812)
+++ libstdc++-v3/src/c++11/ios.cc   (working copy)
@@ -26,9 +26,8 @@
 // ISO C++ 14882: 27.4  Iostreams base classes
 //
 
-// Determines the version of ios_base::failure thrown by __throw_ios_failure.
-// If !_GLIBCXX_USE_DUAL_ABI this will get undefined automatically.
-#define _GLIBCXX_USE_CXX11_ABI 1
+// The ABI version of ios_base::failure thrown by __throw_ios_failure
+// is determined by the default ABI version choosed at configure time
 
 #include 
 #include

Re: [PATCH] Fix ICE with statement frontiers (PR sanitizer/85213)

2018-04-06 Thread Richard Biener

On Thu, 5 Apr 2018, Jakub Jelinek wrote:

> Hi!
> 
> As mentioned in the PR, on the following testcase we cp_save_expr in order
> to avoid evaluating -1 * __builtin_expect (({ ... }), ...) twice and use
> the SAVE_EXPR in the original shift as well as in a comparison, but then
> we attempt to optimize the comparison by grabbing parts of the SAVE_EXPR,
> create comparison out of it and building another SAVE_EXPR around it.
> As unshare_expr/mostly_copy_tree_r doesn't unshare STATEMENT_LISTs, we end
> up with the STATEMENT_LIST containing DEBUG_BEGIN_STMT and x != 0 multiple
> times in the IL and as gimplification is destructive, we destroy the
> STATEMENT_LIST the first time and second time ICE on it.
> 
> I don't see other way how to resolve this than to avoid this weirdo
> optimization, I believe the optimization is from pre-GIMPLE era where we
> didn't have hundreds of passes that can optimize it before expansion.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, haven't seen any
> regressions, ok for trunk?

OK.

Thanks,
Richard.

> 2018-04-05  Jakub Jelinek  
> 
>   PR sanitizer/85213
>   * fold-const.c (twoval_comparison_p): Remove SAVE_P argument and don't
>   look through SAVE_EXPRs with non-side-effects argument.  Adjust
>   recursive calls.
>   (fold_comparison): Adjust twoval_comparison_p caller, don't handle
>   save_p here.
> 
>   * c-c++-common/ubsan/pr85213.c: New test.
> 
> --- gcc/fold-const.c.jj   2018-03-29 12:37:23.887476367 +0200
> +++ gcc/fold-const.c  2018-04-05 11:30:28.996962481 +0200
> @@ -115,7 +115,7 @@ static tree negate_expr (tree);
>  static tree associate_trees (location_t, tree, tree, enum tree_code, tree);
>  static enum comparison_code comparison_to_compcode (enum tree_code);
>  static enum tree_code compcode_to_comparison (enum comparison_code);
> -static int twoval_comparison_p (tree, tree *, tree *, int *);
> +static int twoval_comparison_p (tree, tree *, tree *);
>  static tree eval_subst (location_t, tree, tree, tree, tree, tree);
>  static tree optimize_bit_field_compare (location_t, enum tree_code,
>   tree, tree, tree);
> @@ -3549,13 +3549,12 @@ operand_equal_for_comparison_p (tree arg
> two different values, which will be stored in *CVAL1 and *CVAL2; if
> they are nonzero it means that some operands have already been found.
> No variables may be used anywhere else in the expression except in the
> -   comparisons.  If SAVE_P is true it means we removed a SAVE_EXPR around
> -   the expression and save_expr needs to be called with CVAL1 and CVAL2.
> +   comparisons.  
>  
> If this is true, return 1.  Otherwise, return zero.  */
>  
>  static int
> -twoval_comparison_p (tree arg, tree *cval1, tree *cval2, int *save_p)
> +twoval_comparison_p (tree arg, tree *cval1, tree *cval2)
>  {
>enum tree_code code = TREE_CODE (arg);
>enum tree_code_class tclass = TREE_CODE_CLASS (code);
> @@ -3568,39 +3567,23 @@ twoval_comparison_p (tree arg, tree *cva
>  || code == COMPOUND_EXPR))
>  tclass = tcc_binary;
>  
> -  else if (tclass == tcc_expression && code == SAVE_EXPR
> -&& ! TREE_SIDE_EFFECTS (TREE_OPERAND (arg, 0)))
> -{
> -  /* If we've already found a CVAL1 or CVAL2, this expression is
> -  two complex to handle.  */
> -  if (*cval1 || *cval2)
> - return 0;
> -
> -  tclass = tcc_unary;
> -  *save_p = 1;
> -}
> -
>switch (tclass)
>  {
>  case tcc_unary:
> -  return twoval_comparison_p (TREE_OPERAND (arg, 0), cval1, cval2, 
> save_p);
> +  return twoval_comparison_p (TREE_OPERAND (arg, 0), cval1, cval2);
>  
>  case tcc_binary:
> -  return (twoval_comparison_p (TREE_OPERAND (arg, 0), cval1, cval2, 
> save_p)
> -   && twoval_comparison_p (TREE_OPERAND (arg, 1),
> -   cval1, cval2, save_p));
> +  return (twoval_comparison_p (TREE_OPERAND (arg, 0), cval1, cval2)
> +   && twoval_comparison_p (TREE_OPERAND (arg, 1), cval1, cval2));
>  
>  case tcc_constant:
>return 1;
>  
>  case tcc_expression:
>if (code == COND_EXPR)
> - return (twoval_comparison_p (TREE_OPERAND (arg, 0),
> -  cval1, cval2, save_p)
> - && twoval_comparison_p (TREE_OPERAND (arg, 1),
> - cval1, cval2, save_p)
> - && twoval_comparison_p (TREE_OPERAND (arg, 2),
> - cval1, cval2, save_p));
> + return (twoval_comparison_p (TREE_OPERAND (arg, 0), cval1, cval2)
> + && twoval_comparison_p (TREE_OPERAND (arg, 1), cval1, cval2)
> + && twoval_comparison_p (TREE_OPERAND (arg, 2), cval1, cval2));
>return 0;
>  
>  case tcc_comparison:
> @@ -8781,9 +8764,8 @@ fold_comparison (location_t loc, enum tr
>if (TREE_CODE (arg1) == INTEGER_CST && TREE_CODE (arg0) != INTEGER_CST)
>  {
>tree cva

Re: [AARCH64] Neon vld1__x3, vst1__x2 and vst1_*_x3 intrinsics

2018-04-06 Thread Sameera Deshpande

Hi Christophe,

Please find attached the updated patch with testcases.

Ok for trunk?

- Thanks and regards,
  Sameera D.

2017-12-14 22:17 GMT+05:30 Christophe Lyon :
> 2017-12-14 9:29 GMT+01:00 Sameera Deshpande :
>> Hi!
>>
>> Please find attached the patch implementing vld1_*_x3, vst1_*_x2 and
>> vst1_*_x3 intrinsics as defined by Neon document.
>>
>> Ok for trunk?
>>
>> - Thanks and regards,
>>   Sameera D.
>>
>> gcc/Changelog:
>>
>> 2017-11-14  Sameera Deshpande  
>>
>>
>> * config/aarch64/aarch64-simd-builtins.def (ld1x3): New.
>> (st1x2): Likewise.
>> (st1x3): Likewise.
>> * config/aarch64/aarch64-simd.md
>> (aarch64_ld1x3): New pattern.
>> (aarch64_ld1_x3_): Likewise
>> (aarch64_st1x2): Likewise
>> (aarch64_st1_x2_): Likewise
>> (aarch64_st1x3): Likewise
>> (aarch64_st1_x3_): Likewise
>> * config/aarch64/arm_neon.h (vld1_u8_x3): New function.
>> (vld1_s8_x3): Likewise.
>> (vld1_u16_x3): Likewise.
>> (vld1_s16_x3): Likewise.
>> (vld1_u32_x3): Likewise.
>> (vld1_s32_x3): Likewise.
>> (vld1_u64_x3): Likewise.
>> (vld1_s64_x3): Likewise.
>> (vld1_fp16_x3): Likewise.
>> (vld1_f32_x3): Likewise.
>> (vld1_f64_x3): Likewise.
>> (vld1_p8_x3): Likewise.
>> (vld1_p16_x3): Likewise.
>> (vld1_p64_x3): Likewise.
>> (vld1q_u8_x3): Likewise.
>> (vld1q_s8_x3): Likewise.
>> (vld1q_u16_x3): Likewise.
>> (vld1q_s16_x3): Likewise.
>> (vld1q_u32_x3): Likewise.
>> (vld1q_s32_x3): Likewise.
>> (vld1q_u64_x3): Likewise.
>> (vld1q_s64_x3): Likewise.
>> (vld1q_f16_x3): Likewise.
>> (vld1q_f32_x3): Likewise.
>> (vld1q_f64_x3): Likewise.
>> (vld1q_p8_x3): Likewise.
>> (vld1q_p16_x3): Likewise.
>> (vld1q_p64_x3): Likewise.
>> (vst1_s64_x2): Likewise.
>> (vst1_u64_x2): Likewise.
>> (vst1_f64_x2): Likewise.
>> (vst1_s8_x2): Likewise.
>> (vst1_p8_x2): Likewise.
>> (vst1_s16_x2): Likewise.
>> (vst1_p16_x2): Likewise.
>> (vst1_s32_x2): Likewise.
>> (vst1_u8_x2): Likewise.
>> (vst1_u16_x2): Likewise.
>> (vst1_u32_x2): Likewise.
>> (vst1_f16_x2): Likewise.
>> (vst1_f32_x2): Likewise.
>> (vst1_p64_x2): Likewise.
>> (vst1q_s8_x2): Likewise.
>> (vst1q_p8_x2): Likewise.
>> (vst1q_s16_x2): Likewise.
>> (vst1q_p16_x2): Likewise.
>> (vst1q_s32_x2): Likewise.
>> (vst1q_s64_x2): Likewise.
>> (vst1q_u8_x2): Likewise.
>> (vst1q_u16_x2): Likewise.
>> (vst1q_u32_x2): Likewise.
>> (vst1q_u64_x2): Likewise.
>> (vst1q_f16_x2): Likewise.
>> (vst1q_f32_x2): Likewise.
>> (vst1q_f64_x2): Likewise.
>> (vst1q_p64_x2): Likewise.
>> (vst1_s64_x3): Likewise.
>> (vst1_u64_x3): Likewise.
>> (vst1_f64_x3): Likewise.
>> (vst1_s8_x3): Likewise.
>> (vst1_p8_x3): Likewise.
>> (vst1_s16_x3): Likewise.
>> (vst1_p16_x3): Likewise.
>> (vst1_s32_x3): Likewise.
>> (vst1_u8_x3): Likewise.
>> (vst1_u16_x3): Likewise.
>> (vst1_u32_x3): Likewise.
>> (vst1_f16_x3): Likewise.
>> (vst1_f32_x3): Likewise.
>> (vst1_p64_x3): Likewise.
>> (vst1q_s8_x3): Likewise.
>> (vst1q_p8_x3): Likewise.
>> (vst1q_s16_x3): Likewise.
>> (vst1q_p16_x3): Likewise.
>> (vst1q_s32_x3): Likewise.
>> (vst1q_s64_x3): Likewise.
>> (vst1q_u8_x3): Likewise.
>> (vst1q_u16_x3): Likewise.
>> (vst1q_u32_x3): Likewise.
>> (vst1q_u64_x3): Likewise.
>> (vst1q_f16_x3): Likewise.
>> (vst1q_f32_x3): Likewise.
>> (vst1q_f64_x3): Likewise.
>> (vst1q_p64_x3): Likewise.
>
> Hi,
> I'm not a maintainer, but I suspect you should add some tests.
>
> Christophe



-- 
- Thanks and regards,
  Sameera D.
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index b383f24..2fd072a 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -445,6 +445,15 @@
   BUILTIN_VALL_F16 (STORE1, st1, 0)
   VAR1(STORE1P, st1, 0, v2di)
 
+  /* Implemented by aarch64_ld1x3.  */
+  BUILTIN_VALLDIF (LOADSTRUCT, ld1x3, 0)
+
+  /* Implemented by aarch64_st1x2.  */
+  BUILTIN_VALLDIF (STORESTRUCT, st1x2, 0)
+
+  /* Implemented by aarch64_st1x3.  */
+  BUILTIN_VALLDIF (STORESTRUCT, st1x3, 0)
+
   /* Implemented by fma4.  */
   BUILTIN_VHSDF (TERNOP, fma, 4)
   VAR1 (TERNOP, fma, 4, hf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 3d1f6a0..e197a67 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -5047,6 +5047,70 @@
 }
 })
 
+
+

Re: PR libstdc++/83860 avoid dangling references in valarray closure types

2018-04-06 Thread Jonathan Wakely

On 6 April 2018 at 09:50, Marc Glisse wrote:
> On Fri, 6 Apr 2018, Jonathan Wakely wrote:
>
>> This attempts to solve some of the problems when mixing std::valarray
>> operations and 'auto', by storing nested closure objects as values
>> instead of references. This means we don't end up with dangling
>> references to temporary closures that have already been destroyed.
>>
>> This makes the closure objects introduced by the library less
>> error-prone, but it's still possible to write incorrect code by using
>> temporary valarrays, e.g.
>>
>> std::valarray f();
>> auto x = f() * 2;
>> std::valarray y = x;
>>
>> Here the closure object 'x' has a dangling reference to the temporary
>> returned by f(). It might be possible to do something about this by
>> overloading the operators for valarray rvalues and moving the valarray
>> into the closure, instead of holding a const-reference. I don't plan
>> to work on that.
>>
>> Performance seems to be unaffected by this patch, although I didn't
>> test that very thoroughly.
>>
>> Strictly speaking this is an ABI change as it changes the size and
>> layout of the closure types like _BinClos etc. so that their _M_expr
>> member is at least two words, not just one for a reference. Those
>> closure types should never appear in API boundaries or as class
>> members (if anybody was doing that by using 'auto' it wouldn't have
>> worked correctly anyway) so I think we can just change them, without
>> renaming the types or moving them into an inline namespace so they
>> mangle differently. Does anybody disagree?
>
>
> When I was looking into doing something similar for GMP, I had decided to
> rename the class. Some inline functions have incompatible behavior before
> and after the change, and if they are not actually inlined and you mix the 2
> types of objects (through a library for instance) without a lot of care on
> symbol visibility, a single version will be used for both.
>
> If you think that's too contrived a scenario, then renaming may not be
> needed.

Maybe that's safest. We can just enclose them in namespace __detail or
__valarray and then add using-declarations to add them back to
namespace std.

>> The PR is marked as a regression because the example in the PR used to
>> "work" with older releases. That's probably only because they didn't
>> optimize as aggressively and so the stack space of the expired
>> temporaries wasn't reused (ASan still shows the bug was there in older
>> releases even if it doesn't crash). As a regression this should be
>> backported, but the layout changes make me pause a little when
>> considering making the change on stable release branches.
>
>
> I wouldn't count it as a regression.

OK thanks for the comments. I think this can probably wait for stage 1.

Re: [PATCH][GCC][mid-end] Fix PR85123 incorrect copies

2018-04-06 Thread Richard Biener

On Fri, 6 Apr 2018, Alan Modra wrote:

> On Thu, Apr 05, 2018 at 01:29:06PM +0100, Tamar Christina wrote:
> > diff --git a/gcc/expr.c b/gcc/expr.c
> > index 
> > 00660293f72e5441a6421a280b04c57fca2922b8..7daeb8c91d758edf0b3dc37f6927380b6f3df877
> >  100644
> > --- a/gcc/expr.c
> > +++ b/gcc/expr.c
> > @@ -2749,7 +2749,7 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
> >  {
> >int i, n_regs;
> >unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes;
> > -  unsigned int bitsize;
> > +   unsigned int bitsize = 0;
> >rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX;
> >/* No current ABI uses variable-sized modes to pass a BLKmnode type.  */
> >fixed_size_mode mode = as_a  (mode_in);
> > @@ -2782,7 +2782,7 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
> >  
> >n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
> >dst_words = XALLOCAVEC (rtx, n_regs);
> > -  bitsize = BITS_PER_WORD;
> > +
> >if (targetm.slow_unaligned_access (word_mode, TYPE_ALIGN (TREE_TYPE 
> > (src
> >  bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD);
> >  
> 
> You calculate bitsize here, then override it in the loop?  Doesn't
> that mean strict align targets will use mis-aligned loads and stores?
> 
> > @@ -2791,6 +2791,17 @@ copy_blkmode_to_reg (machine_mode mode_in, tree src)
> > bitpos < bytes * BITS_PER_UNIT;
> > bitpos += bitsize, xbitpos += bitsize)
> >  {
> > +  /* Find the largest integer mode that can be used to copy all or as
> > +many bits as possible of the structure.  */
> > +  opt_scalar_int_mode mode_iter;
> > +  FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
> > +  if (GET_MODE_BITSIZE (mode_iter.require ())
> > +   <= ((bytes * BITS_PER_UNIT) - bitpos)
> > + && GET_MODE_BITSIZE (mode_iter.require ()) <= BITS_PER_WORD)
> > +   bitsize = GET_MODE_BITSIZE (mode_iter.require ());
> > +  else
> > +   break;
> > +
> 
> This isn't correct.  Consider a 6 byte struct on a 4 byte word, 8 bit
> byte, big-endian target when targetm.calls.return_in_msb is false.
> 
> In this scenario, copy_blkmode_to_reg should return two registers, set
> as if they had been loaded from two words in memory laid out as
> follows (left to right increasing byte addresses):
>  ___ ___
> |  0  |  0  |  s0 |  s1 |   |  s2 |  s3 |  s4 |  s5 |
>  ~~~ ~~~
> 
> So we will have xbitpos=16 first time around the loop.  That means
> your new code will attempt to store 32 bits into a bit-field starting
> at bit 16 in the first 32-bit register, and of course fail.
> 
> This scenario used to be handled correctly, at least when the struct
> wasn't over-aligned.

I wonder if it's best to revert the original regression causing patch
and look for a proper solution in the GCC 9 timeframe?

Thanks,
Richard.

Re: [PATCH] Mitigate PR85222 a bit

2018-04-06 Thread Jonathan Wakely

On 6 April 2018 at 10:39, Richard Biener wrote:
>
> The following allows people configuring the gcc-4 compatible ABI
> as the default ABI to retain compatibility with old programs
> catching ios_base::failure by matching the abi version thrown
> to the configured default ABI.
>
> This doesn't really fix the PR but it makes behavior between
> the dual-ABI with default gcc-4 compatible consistent with that
> of the non-dual-ABI which is what I had expected.
>
> Whether an ABI break is really desired for the case of a c++11
> default ABI is still questionable and any programs that differ
> from the default ABI choice will now be broken (compared to
> those differing from c++11).
>
> Bootstrapped on x86_64-unknown-linux-gnu, ok for trunk?  Ok for
> the GCC 7 branch?

Thinking about this further, I seem to recall we decided *not* to do
this. Currently the --with-default-libstdcxx-abi flag only affects the
default value of a macro in the headers, it doesn't affect the library
ABI. The header macro can be overriden per translation unit, but the
library ABI is always the same (it always throws the new ios::failure
type). With this change the option would affect the library ABI and so
--with-default-libstdcxx-abi would cause a new effect on libstdc++.so

> I'm not sure if we want to revert r245167 after this?  I'm
> re-running the testsuite with a gcc4-compatible ABI right now.
> At least with a "real" fix we should be able to run the affected
> tests twice, once with the new and once with the old ABI.
>
> Thanks,
> Richard.
>
> 2018-04-06  Richard Biener  
>
> PR libstdc++/85222
> * src/c++11/ios.cc: Remove hard define of _GLIBCXX_USE_CXX11_ABI to 1.
> Instead use the configured default ABI to decide the ABI version
> of ios_base::failure thrown by __throw_ios_failure.
>
> Index: libstdc++-v3/src/c++11/ios.cc
> ===
> --- libstdc++-v3/src/c++11/ios.cc   (revision 258812)
> +++ libstdc++-v3/src/c++11/ios.cc   (working copy)
> @@ -26,9 +26,8 @@
>  // ISO C++ 14882: 27.4  Iostreams base classes
>  //
>
> -// Determines the version of ios_base::failure thrown by __throw_ios_failure.
> -// If !_GLIBCXX_USE_DUAL_ABI this will get undefined automatically.
> -#define _GLIBCXX_USE_CXX11_ABI 1
> +// The ABI version of ios_base::failure thrown by __throw_ios_failure
> +// is determined by the default ABI version choosed at configure time
>
>  #include 
>  #include

Re: [PATCH] Mitigate PR85222 a bit

2018-04-06 Thread Richard Biener

On Fri, 6 Apr 2018, Jonathan Wakely wrote:

> On 6 April 2018 at 10:39, Richard Biener wrote:
> >
> > The following allows people configuring the gcc-4 compatible ABI
> > as the default ABI to retain compatibility with old programs
> > catching ios_base::failure by matching the abi version thrown
> > to the configured default ABI.
> >
> > This doesn't really fix the PR but it makes behavior between
> > the dual-ABI with default gcc-4 compatible consistent with that
> > of the non-dual-ABI which is what I had expected.
> >
> > Whether an ABI break is really desired for the case of a c++11
> > default ABI is still questionable and any programs that differ
> > from the default ABI choice will now be broken (compared to
> > those differing from c++11).
> >
> > Bootstrapped on x86_64-unknown-linux-gnu, ok for trunk?  Ok for
> > the GCC 7 branch?
> 
> Thinking about this further, I seem to recall we decided *not* to do
> this. Currently the --with-default-libstdcxx-abi flag only affects the
> default value of a macro in the headers, it doesn't affect the library
> ABI. The header macro can be overriden per translation unit, but the
> library ABI is always the same (it always throws the new ios::failure
> type). With this change the option would affect the library ABI and so
> --with-default-libstdcxx-abi would cause a new effect on libstdc++.so

But --disable-libstdcxx-dual-abi does so as well.  Currently the
behavior with that compared to 
--with-default-libstdcxx-abi=gcc4-compatible is inconsistent with
respect to whether the following testcase works or not (note it
doesn't explicitely specify the ABI version to use)

#include 
#include 
using namespace std;
int main ()
{
  std::ifstream pstats;
  pstats.exceptions(ifstream::failbit | ifstream::badbit | 
ifstream::eofbit);
  try {
  printf("\n Opening file : /proc/0/stat ");
  pstats.open("/proc/0/stat");
  }
  catch(ifstream::failure e)
{
  printf("\n!!Caught ifstream exception!!\n");
  if(pstats.is_open()) {
  pstats.close();
  }
}
  return 0;
}

I call that a bug.  Yes, we talked about ways to eventually fix that.
But is anybody working on that?

I think the current default behavior with 
--with-default-libstdcxx-abi=gcc4-compatible is undesirable.

Thanks,
Richard.

> > I'm not sure if we want to revert r245167 after this?  I'm
> > re-running the testsuite with a gcc4-compatible ABI right now.
> > At least with a "real" fix we should be able to run the affected
> > tests twice, once with the new and once with the old ABI.
> >
> > Thanks,
> > Richard.
> >
> > 2018-04-06  Richard Biener  
> >
> > PR libstdc++/85222
> > * src/c++11/ios.cc: Remove hard define of _GLIBCXX_USE_CXX11_ABI to 
> > 1.
> > Instead use the configured default ABI to decide the ABI version
> > of ios_base::failure thrown by __throw_ios_failure.
> >
> > Index: libstdc++-v3/src/c++11/ios.cc
> > ===
> > --- libstdc++-v3/src/c++11/ios.cc   (revision 258812)
> > +++ libstdc++-v3/src/c++11/ios.cc   (working copy)
> > @@ -26,9 +26,8 @@
> >  // ISO C++ 14882: 27.4  Iostreams base classes
> >  //
> >
> > -// Determines the version of ios_base::failure thrown by 
> > __throw_ios_failure.
> > -// If !_GLIBCXX_USE_DUAL_ABI this will get undefined automatically.
> > -#define _GLIBCXX_USE_CXX11_ABI 1
> > +// The ABI version of ios_base::failure thrown by __throw_ios_failure
> > +// is determined by the default ABI version choosed at configure time
> >
> >  #include 
> >  #include 
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Re: [PATCH][GCC][mid-end] Fix PR85123 incorrect copies

2018-04-06 Thread Eric Botcazou

> I wonder if it's best to revert the original regression causing patch
> and look for a proper solution in the GCC 9 timeframe?

Probably the best course of action indeed, as changes in this area need to be 
thoroughly tested on various targets to cover all the cases.

-- 
Eric Botcazou

Re: [PATCH][GCC][mid-end] Fix PR85123 incorrect copies

2018-04-06 Thread Richard Biener

On Fri, 6 Apr 2018, Eric Botcazou wrote:

> > I wonder if it's best to revert the original regression causing patch
> > and look for a proper solution in the GCC 9 timeframe?
> 
> Probably the best course of action indeed, as changes in this area need to be 
> thoroughly tested on various targets to cover all the cases.

So please go ahead with that now.  If we wind up with an obviously
correct solution we can reconsider later, backporting to GCC 8.2+ is
possible as well.

Thanks,
Richard.

[PATCH] Fix PR85244

2018-04-06 Thread Richard Biener


The following fixes PR85244 where we fail to handle a flex-array
reference as such and constrain it with the appearant size of an external
declaration.  The fix is to handle this case much like the unconstrained
common one and to not regress some cases the patch adjusts the handling
of flex-array detection when visiting component references.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2018-04-06  Richard Biener  

PR middle-end/85244
* tree-dfa.c (get_ref_base_and_extent): Reset seen_variable_array_ref
after seeing a component reference with an adjacent field.  Treat
refs to arrays at struct end of external decls similar to
refs to unconstrained commons.

* gcc.dg/torture/pr85244-1.c: New testcase.
* gcc.dg/torture/pr85244-2.c: Likewise.

Index: gcc/tree-dfa.c
===
--- gcc/tree-dfa.c  (revision 259082)
+++ gcc/tree-dfa.c  (working copy)
@@ -438,7 +438,7 @@ get_ref_base_and_extent (tree exp, poly_
   referenced the last field of a struct or a union member
   then we have to adjust maxsize by the padding at the end
   of our field.  */
-   if (seen_variable_array_ref && known_size_p (maxsize))
+   if (seen_variable_array_ref)
  {
tree stype = TREE_TYPE (TREE_OPERAND (exp, 0));
tree next = DECL_CHAIN (field);
@@ -454,7 +454,7 @@ get_ref_base_and_extent (tree exp, poly_
|| ssize == NULL
|| !poly_int_tree_p (ssize))
  maxsize = -1;
-   else
+   else if (known_size_p (maxsize))
  {
poly_offset_int tem
  = (wi::to_poly_offset (ssize)
@@ -464,6 +464,11 @@ get_ref_base_and_extent (tree exp, poly_
maxsize += tem;
  }
  }
+   /* An component ref with an adjacent field up in the
+  structure hierarchy constrains the size of any variable
+  array ref lower in the access hierarchy.  */
+   else
+ seen_variable_array_ref = false;
  }
  }
else
@@ -622,7 +627,9 @@ get_ref_base_and_extent (tree exp, poly_
 
   if (DECL_P (exp))
 {
-  if (flag_unconstrained_commons && VAR_P (exp) && DECL_COMMON (exp))
+  if (VAR_P (exp)
+ && ((flag_unconstrained_commons && DECL_COMMON (exp))
+ || (DECL_EXTERNAL (exp) && seen_variable_array_ref)))
{
  tree sz_tree = TYPE_SIZE (TREE_TYPE (exp));
  /* If size is unknown, or we have read to the end, assume there
Index: gcc/testsuite/gcc.dg/torture/pr85244-1.c
===
--- gcc/testsuite/gcc.dg/torture/pr85244-1.c(nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr85244-1.c(working copy)
@@ -0,0 +1,19 @@
+/* { dg-do run } */
+/* { dg-additional-sources "pr85244-2.c" } */
+
+struct s {
+ long a;
+ int b;
+ int tab[];
+};
+
+extern const struct s val;
+extern int idx;
+extern void abort (void);
+
+int main()
+{
+  if (val.tab[0] != 42 || val.tab[1] != 1337 || val.tab[idx] != 1337)
+abort ();
+  return 0;
+}
Index: gcc/testsuite/gcc.dg/torture/pr85244-2.c
===
--- gcc/testsuite/gcc.dg/torture/pr85244-2.c(nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr85244-2.c(working copy)
@@ -0,0 +1,8 @@
+struct s {
+long a;
+int b;
+int tab[];
+};
+
+int idx = 1;
+const struct s val = { 0, 0, { 42, 1337 } };

Re: [PATCH] Mitigate PR85222 a bit

2018-04-06 Thread Jonathan Wakely

On 6 April 2018 at 12:22, Richard Biener wrote:
> On Fri, 6 Apr 2018, Jonathan Wakely wrote:
>
>> On 6 April 2018 at 10:39, Richard Biener wrote:
>> >
>> > The following allows people configuring the gcc-4 compatible ABI
>> > as the default ABI to retain compatibility with old programs
>> > catching ios_base::failure by matching the abi version thrown
>> > to the configured default ABI.
>> >
>> > This doesn't really fix the PR but it makes behavior between
>> > the dual-ABI with default gcc-4 compatible consistent with that
>> > of the non-dual-ABI which is what I had expected.
>> >
>> > Whether an ABI break is really desired for the case of a c++11
>> > default ABI is still questionable and any programs that differ
>> > from the default ABI choice will now be broken (compared to
>> > those differing from c++11).
>> >
>> > Bootstrapped on x86_64-unknown-linux-gnu, ok for trunk?  Ok for
>> > the GCC 7 branch?
>>
>> Thinking about this further, I seem to recall we decided *not* to do
>> this. Currently the --with-default-libstdcxx-abi flag only affects the
>> default value of a macro in the headers, it doesn't affect the library
>> ABI. The header macro can be overriden per translation unit, but the
>> library ABI is always the same (it always throws the new ios::failure
>> type). With this change the option would affect the library ABI and so
>> --with-default-libstdcxx-abi would cause a new effect on libstdc++.so
>
> But --disable-libstdcxx-dual-abi does so as well.

Right, but that one (obviously) alters the library ABI, by disabling
all the duplicated symbols.

Currently --with-default-libstdcxx-abi doesn't alter the ABI of the
shared lib when the dual-abi is enabled, just whether the initial
value of the macro is 0 or 1. All the symbols in the library, and the
type of exception thrown, are the same for every dual-abi build of the
library.

Sure, we _can_ change that, but should we?

We could add yet another option, so you can set the initial value of
the macro to 0 but still have it throw the new exception type, or vice
versa. I don't really like that option either.

>  Currently the
> behavior with that compared to
> --with-default-libstdcxx-abi=gcc4-compatible is inconsistent with
> respect to whether the following testcase works or not (note it
> doesn't explicitely specify the ABI version to use)
>
> #include 
> #include 
> using namespace std;
> int main ()
> {
>   std::ifstream pstats;
>   pstats.exceptions(ifstream::failbit | ifstream::badbit |
> ifstream::eofbit);
>   try {
>   printf("\n Opening file : /proc/0/stat ");
>   pstats.open("/proc/0/stat");
>   }
>   catch(ifstream::failure e)
> {
>   printf("\n!!Caught ifstream exception!!\n");
>   if(pstats.is_open()) {
>   pstats.close();
>   }
> }
>   return 0;
> }
>
> I call that a bug.  Yes, we talked about ways to eventually fix that.
> But is anybody working on that?

Not for GCC 8.1, but then we already shipped 7.x with the current
behaviour anyway.

Re: [PATCH] Mitigate PR85222 a bit

2018-04-06 Thread Jonathan Wakely

On 6 April 2018 at 12:48, Jonathan Wakely wrote:
> Currently --with-default-libstdcxx-abi doesn't alter the ABI of the
> shared lib when the dual-abi is enabled, just whether the initial
> value of the macro is 0 or 1. All the symbols in the library, and the
> type of exception thrown, are the same for every dual-abi build of the
> library.

I mean every dual-abi build for a given release, of course. Because
the type of exception thrown changed between gcc 6 and gcc 7, so maybe
my position that the option doesn't affect the ABI of the .so is
untenable, given that it changed independent of that option :-\

Re: [PATCH] Mitigate PR85222 a bit

2018-04-06 Thread Richard Biener

On Fri, 6 Apr 2018, Jonathan Wakely wrote:

> On 6 April 2018 at 12:48, Jonathan Wakely wrote:
> > Currently --with-default-libstdcxx-abi doesn't alter the ABI of the
> > shared lib when the dual-abi is enabled, just whether the initial
> > value of the macro is 0 or 1. All the symbols in the library, and the
> > type of exception thrown, are the same for every dual-abi build of the
> > library.
> 
> I mean every dual-abi build for a given release, of course. Because
> the type of exception thrown changed between gcc 6 and gcc 7, so maybe
> my position that the option doesn't affect the ABI of the .so is
> untenable, given that it changed independent of that option :-\

All I can say is that for the SUSE distributions that use the old
ABI by default but still provide GCC 7+ as optional choice and thus
end up using the newest compiler runtime I'm taking the pragmatic
approach of making --with-default-libstdcxx-abi=gcc4-compatible
throw the old ABI ios_base::failure class via said patch.

Currently there's no way to get the old ABI thrown besides
disabling dual-ABI but that removes all the symbols that have
been present since those distributions offered GCC 5 so isn't
a good option.  Which means the ABI break between GCC 5/6 and GCC 7
has to be reverted here.

Of course I think the ABI break is even intolerable when the
new ABI is the default.  Ways to fix that have been discussed in
the PR but a fix is of course taking time to implement and verify.

Richard.

RE: [PATCH][GCC][mid-end] Fix PR85123 incorrect copies

2018-04-06 Thread Tamar Christina



> -Original Message-
> From: Richard Biener 
> Sent: Friday, April 6, 2018 12:36
> To: Eric Botcazou 
> Cc: gcc-patches@gcc.gnu.org; Alan Modra ; Tamar
> Christina ; nd ;
> l...@redhat.com; i...@airs.com; berg...@vnet.ibm.com
> Subject: Re: [PATCH][GCC][mid-end] Fix PR85123 incorrect copies
> 
> On Fri, 6 Apr 2018, Eric Botcazou wrote:
> 
> > > I wonder if it's best to revert the original regression causing
> > > patch and look for a proper solution in the GCC 9 timeframe?
> >
> > Probably the best course of action indeed, as changes in this area
> > need to be thoroughly tested on various targets to cover all the cases.
> 
> So please go ahead with that now.  If we wind up with an obviously correct
> solution we can reconsider later, backporting to GCC 8.2+ is possible as well.

I was going to suggest a more conservative approach of only doing it if there 
is no
Padding correction. Since it seems to me the only targets that have an issue 
are those
which use padding corrections.

But I'll revert the patch then.

> 
> Thanks,
> Richard.

RE: [PATCH][GCC][mid-end] Fix PR85123 incorrect copies

2018-04-06 Thread Tamar Christina

Patch has been reverted as r259169 as requested.

Sorry for the breakage,
Tamar.

> -Original Message-
> From: Richard Biener 
> Sent: Friday, April 6, 2018 12:36
> To: Eric Botcazou 
> Cc: gcc-patches@gcc.gnu.org; Alan Modra ; Tamar
> Christina ; nd ;
> l...@redhat.com; i...@airs.com; berg...@vnet.ibm.com
> Subject: Re: [PATCH][GCC][mid-end] Fix PR85123 incorrect copies
> 
> On Fri, 6 Apr 2018, Eric Botcazou wrote:
> 
> > > I wonder if it's best to revert the original regression causing
> > > patch and look for a proper solution in the GCC 9 timeframe?
> >
> > Probably the best course of action indeed, as changes in this area
> > need to be thoroughly tested on various targets to cover all the cases.
> 
> So please go ahead with that now.  If we wind up with an obviously correct
> solution we can reconsider later, backporting to GCC 8.2+ is possible as well.
> 
> Thanks,
> Richard.

[PATCH] Handle empty infinite loops in OpenACC for PR84955

2018-04-06 Thread Cesar Philippidis

As shown in PR84955, GCC has problems empty infinite loops of the form

  #pragma acc loop tile (2, 2)
  for (...)
for (...)
{
  for (...)
/* do nothing */
;
}

I also observed that it generates bad code for the following loop

  #pragma acc loop
  for (...)
  {
for (...)
  /* do nothing */
  ;
  }

There are two problems here. First, for the tiled loop, as
expand_oacc_for splits the first basic block of the loop body to add a
branch to perform the tiling control logic, it does not consider fact
that the body may not have an exit when it contains an infinite loop.
This situation can occur when region->cont is NULL. The fix here was to
add a dummy false edge to exit_bb.

The second problem involves fixing up the CFG. Unlike OpenMP which
defers a lot of the scheduling control to libgomp, OpenACC makes heavy
use of calls to internal functions. The problem here is that the
fixup_cfg pass was largely ignoring calls to internal functions when it
decides to set PROP_cleanup_cfg. When the CFG isn't cleaned up, the LTO
streamer will report that there are fewer loops than there actually are
and that causes an ICE because the empty infinite loop is never
accounted for. This patch resolves this issue by relaxing the fixup_cfg
pass to treat all functions calls, including those to internal
functions, the same. An alternative approach to resolve this issue would
have been to teach ipa_write_summaries to check if the loop structure
needs to be fixed up and call cleanup_cfg as necessary. But I wanted to
keep the OMP and OACC code paths similar, so I took the former approach.

I regression tested this patch on x86_64-linux using nvptx offloading.
Is this patch OK for trunk and GCC 7 (and probably GCC 6).

Thanks,
Cesar
Fix PR84955

2018-04-06  Cesar Philippidis  

	PR middle-end/84955

	gcc/
	* cfgloop.c (flow_loops_find): Add assert.
	* omp-expand.c (expand_oacc_for): Add dummy false branch for
tiled basic blocks without omp continue statements.
	* tree-cfg.c (execute_fixup_cfg): Handle calls to internal
functions like regular functions.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/pr84955.c: New test.
	* testsuite/libgomp.oacc-fortran/pr84955.f90: New test.


diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 8af793c6015..6e68639452c 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -462,6 +462,9 @@ flow_loops_find (struct loops *loops)
 	{
 	  struct loop *loop;
 
+	  if (!from_scratch)
+	gcc_assert (header->loop_father != NULL);
+
 	  /* The current active loop tree has valid loop-fathers for
 	 header blocks.  */
 	  if (!from_scratch
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index bb204906ea6..1c7b68fbd8c 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -5439,6 +5439,13 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
 
 	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
 
+	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
+	  if (cont_bb == NULL)
+	{
+	  edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
+	  e->probability = profile_probability::even ();
+	}
+
 	  /* Initialize the user's loop vars.  */
 	  gsi = gsi_start_bb (elem_body_bb);
 	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 9485f73f341..cb676d78128 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -9586,10 +9586,7 @@ execute_fixup_cfg (void)
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
 	{
 	  gimple *stmt = gsi_stmt (gsi);
-	  tree decl = is_gimple_call (stmt)
-		  ? gimple_call_fndecl (stmt)
-		  : NULL;
-	  if (decl)
+	  if (is_gimple_call (stmt))
 	{
 	  int flags = gimple_call_flags (stmt);
 	  if (flags & (ECF_CONST | ECF_PURE | ECF_LOOPING_CONST_OR_PURE))
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955.c
new file mode 100644
index 000..5910b57b68d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955.c
@@ -0,0 +1,20 @@
+/* { dg-do compile }  */
+
+int
+main ()
+{
+  int i, j;
+
+#pragma acc parallel loop tile(2,3)
+  for (i = 1; i < 10; i++)
+for (j = 1; j < 10; j++)
+  for (;;)
+	;
+
+#pragma acc parallel loop
+  for (i = 1; i < 10; i++)
+for (;;)
+  ;
+
+  return i + j;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr84955.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr84955.f90
new file mode 100644
index 000..878d8a89f41
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pr84955.f90
@@ -0,0 +1,20 @@
+! { dg-do compile }
+
+subroutine s
+   integer :: i, j
+   !$acc parallel loop tile(2,3)
+   do i = 1, 10
+  do j = 1, 10
+ do
+ end do
+  end do
+   end do
+  !$acc end parallel loop
+
+  !$acc parallel loop
+   do i = 1, 10
+  do
+  end do
+   end do
+  !$acc end parallel loop
+end subroutine s

C++ PATCH for c++/85240, LTO ICE with using of undeduced auto

2018-04-06 Thread Jason Merrill

poplevel already tries to keep undeduced autos out of BLOCK_VARS, but
in this testcase cp_genericize_r was adding one in later on.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 61f3456c563fd7b3354a688f40c1afbfa8423442
Author: Jason Merrill 
Date:   Fri Apr 6 07:45:01 2018 -0400

PR c++/85240 - LTO ICE with using of undeduced auto fn.

* cp-gimplify.c (cp_genericize_r): Discard using of undeduced auto.

diff --git a/gcc/cp/cp-gimplify.c b/gcc/cp/cp-gimplify.c
index fd0c37f9be2..fb0aea3e0c7 100644
--- a/gcc/cp/cp-gimplify.c
+++ b/gcc/cp/cp-gimplify.c
@@ -1294,16 +1294,20 @@ cp_genericize_r (tree *stmt_p, int *walk_subtrees, void *data)
 	  }
 	if (block)
 	  {
-	tree using_directive;
-	gcc_assert (TREE_OPERAND (stmt, 0));
+	tree decl = TREE_OPERAND (stmt, 0);
+	gcc_assert (decl);
 
-	using_directive = make_node (IMPORTED_DECL);
-	TREE_TYPE (using_directive) = void_type_node;
+	if (undeduced_auto_decl (decl))
+	  /* Omit from the GENERIC, the back-end can't handle it.  */;
+	else
+	  {
+		tree using_directive = make_node (IMPORTED_DECL);
+		TREE_TYPE (using_directive) = void_type_node;
 
-	IMPORTED_DECL_ASSOCIATED_DECL (using_directive)
-	  = TREE_OPERAND (stmt, 0);
-	DECL_CHAIN (using_directive) = BLOCK_VARS (block);
-	BLOCK_VARS (block) = using_directive;
+		IMPORTED_DECL_ASSOCIATED_DECL (using_directive) = decl;
+		DECL_CHAIN (using_directive) = BLOCK_VARS (block);
+		BLOCK_VARS (block) = using_directive;
+	  }
 	  }
 	/* The USING_STMT won't appear in GENERIC.  */
 	*stmt_p = build1 (NOP_EXPR, void_type_node, integer_zero_node);
diff --git a/gcc/testsuite/g++.dg/cpp1y/auto-fn51.C b/gcc/testsuite/g++.dg/cpp1y/auto-fn51.C
new file mode 100644
index 000..7e4f488458b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/auto-fn51.C
@@ -0,0 +1,9 @@
+// PR c++/85240
+// { dg-do compile { target c++14 } }
+
+auto foo();
+
+void bar()
+{
+  using ::foo;
+}

C++ PATCH for c++/85242, ICE with class defn in template parm

2018-04-06 Thread Jason Merrill

Here, we were trying to start defining a template in the middle of the
template parameter list for another template.  Adjusting the
PROCESSING_REAL_TEMPLATE_DECL_P macro to exclude this situation seems
appropriate.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit f7bcdc914de03d87a99bc7ebd85fefb031028ef0
Author: Jason Merrill 
Date:   Fri Apr 6 09:07:23 2018 -0400

PR c++/85242 - ICE with class definition in template parm.

* cp-tree.h (PROCESSING_REAL_TEMPLATE_DECL_P): False if
processing_template_parmlist.

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index dbe34c096f0..204791e51cf 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -4719,7 +4719,8 @@ more_aggr_init_expr_args_p (const aggr_init_expr_arg_iterator *iter)
entity with its own template parameter list, and which is not a
full specialization.  */
 #define PROCESSING_REAL_TEMPLATE_DECL_P() \
-  (processing_template_decl > template_class_depth (current_scope ()))
+  (!processing_template_parmlist \
+   && processing_template_decl > template_class_depth (current_scope ()))
 
 /* Nonzero if this VAR_DECL or FUNCTION_DECL has already been
instantiated, i.e. its definition has been generated from the
diff --git a/gcc/testsuite/g++.dg/template/error58.C b/gcc/testsuite/g++.dg/template/error58.C
new file mode 100644
index 000..cede1c94887
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/error58.C
@@ -0,0 +1,8 @@
+// PR c++/85242
+
+namespace N
+{
+  struct A {};
+}
+
+template void foo(); // { dg-error "" }

Re: [PATCH v2] C++: suggest missing headers for implicit use of "std" (PR c++/85021)

2018-04-06 Thread Jason Merrill

OK.

On Thu, Apr 5, 2018 at 7:40 PM, David Malcolm  wrote:
> On Thu, 2018-03-29 at 15:25 -0400, Jason Merrill wrote:
>> On Thu, Mar 22, 2018 at 7:44 PM, David Malcolm 
>> wrote:
>> > We provide fix-it hints for the most common "std" names when an
>> > explicit
>> > "std::" prefix is present, however we don't yet provide fix-it
>> > hints for
>> > this implicit case:
>> >
>> >   using namespace std;
>> >   void f() {  cout << "test"; }
>> >
>> > for which we emit:
>> >
>> >   t.cc: In function 'void f()':
>> >   t.cc:2:13: error: 'cout' was not declared in this scope
>> >void f() {  cout << "test"; }
>> >^~~~
>> >
>> > This patch detects if a "using namespace std;" directive is present
>> > in the current namespace, and if so, offers a suggestion for
>> > unrecognized names that are in our list of common "std" names:
>> >
>> >   t.cc: In function 'void f()':
>> >   t.cc:2:13: error: 'cout' was not declared in this scope
>> >void f() {  cout << "test"; }
>> >^~~~
>> >   t.cc:2:13: note: 'std::cout' is defined in header '';
>> > did you forget to '#include '?
>> >   +#include 
>> >using namespace std;
>> >void f() {  cout << "test"; }
>> >^~~~
>> >
>> > Is there a better way to test for the using directive?
>> >
>> > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
>> >
>> > OK for trunk?
>> >
>> > gcc/cp/ChangeLog:
>> > PR c++/85021
>> > * name-lookup.c (has_using_namespace_std_directive_p): New
>> > function.
>> > (suggest_alternatives_for): Simplify if/else logic using
>> > early
>> > returns.  If no candidates were found, and there's a
>> > "using namespace std;" directive, call
>> > maybe_suggest_missing_std_header.
>> > (maybe_suggest_missing_header): Split later part of the
>> > function
>> > into..
>> > (maybe_suggest_missing_std_header): New.
>> >
>> > gcc/testsuite/ChangeLog:
>> > PR c++/85021
>> > * g++.dg/lookup/missing-std-include-7.C: New test.
>> > ---
>> >  gcc/cp/name-lookup.c   | 68
>> > +-
>> >  .../g++.dg/lookup/missing-std-include-7.C  | 16 +
>> >  2 files changed, 70 insertions(+), 14 deletions(-)
>> >  create mode 100644 gcc/testsuite/g++.dg/lookup/missing-std-
>> > include-7.C
>> >
>> > diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c
>> > index 061729a..4eb980e 100644
>> > --- a/gcc/cp/name-lookup.c
>> > +++ b/gcc/cp/name-lookup.c
>> > @@ -41,6 +41,7 @@ static cxx_binding *cxx_binding_make (tree value,
>> > tree type);
>> >  static cp_binding_level *innermost_nonclass_level (void);
>> >  static void set_identifier_type_value_with_scope (tree id, tree
>> > decl,
>> >   cp_binding_level
>> > *b);
>> > +static bool maybe_suggest_missing_std_header (location_t location,
>> > tree name);
>> >
>> >  /* Create an overload suitable for recording an artificial
>> > TYPE_DECL
>> > and another decl.  We use this machanism to implement the
>> > struct
>> > @@ -5327,6 +5328,23 @@ qualify_lookup (tree val, int flags)
>> >return true;
>> >  }
>> >
>> > +/* Is there a "using namespace std;" directive within the current
>> > +   namespace?  */
>> > +
>> > +static bool
>> > +has_using_namespace_std_directive_p ()
>> > +{
>> > +  vec *usings = DECL_NAMESPACE_USING
>> > (current_namespace);
>>
>> Checking in just the current namespace won't find a "using namespace
>> std" in an inner or outer scope; I think you want to add something to
>> name-lookup.c that iterates over the current enclosing scopes like
>> name_lookup::search_unqualified.  Nathan can probably be more
>> specific.
>>
>> Jason
>
> Thanks.
>
> Here's an updated version of the patch.
>
> Rather than just search in DECL_NAMESPACE_USING (current_namespace),
> this version mimics name_lookup::search_unqualified by searching for local
> using-directives in the current_binding_level until it reaches a sk_namespace,
> and then searching in current_namespace and its ancestors.
> (and builds out the test coverage accordingly)
>
> Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu; adds
> 57 PASS results to g++.sum.
>
> OK for trunk?
>
> gcc/cp/ChangeLog:
> PR c++/85021
> * name-lookup.c (using_directives_contain_std_p): New function.
> (has_using_namespace_std_directive_p): New function.
> (suggest_alternatives_for): Simplify if/else logic using early
> returns.  If no candidates were found, and there's a
> "using namespace std;" directive, call
> maybe_suggest_missing_std_header.
> (maybe_suggest_missing_header): Split later part of the function
> into..
> (maybe_suggest_missing_std_header): New.
>
> gcc/testsuite/ChangeLog:
> PR c++/85021
> * g++.dg/lookup/missing-std-include-7.C: New test.
> ---
>  gcc/cp/name-lookup.c

Re: [PATCH] Handle empty infinite loops in OpenACC for PR84955

2018-04-06 Thread Jakub Jelinek

On Fri, Apr 06, 2018 at 06:48:52AM -0700, Cesar Philippidis wrote:
> 2018-04-06  Cesar Philippidis  
> 
>   PR middle-end/84955
> 
>   gcc/
>   * cfgloop.c (flow_loops_find): Add assert.
>   * omp-expand.c (expand_oacc_for): Add dummy false branch for
> tiled basic blocks without omp continue statements.
>   * tree-cfg.c (execute_fixup_cfg): Handle calls to internal
> functions like regular functions.
> 
>   libgomp/
>   * testsuite/libgomp.oacc-c-c++-common/pr84955.c: New test.
>   * testsuite/libgomp.oacc-fortran/pr84955.f90: New test.

I'd like to defer the cfgloop.c and tree-cfg.c changes to Richard, just want to
mention that:

> --- a/gcc/tree-cfg.c
> +++ b/gcc/tree-cfg.c
> @@ -9586,10 +9586,7 @@ execute_fixup_cfg (void)
>for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
>   {
> gimple *stmt = gsi_stmt (gsi);
> -   tree decl = is_gimple_call (stmt)
> -   ? gimple_call_fndecl (stmt)
> -   : NULL;
> -   if (decl)
> +   if (is_gimple_call (stmt))

This change doesn't affect just internal functions, but also all indirect
calls through function pointers with const, pure or noreturn attributes.

> --- a/gcc/omp-expand.c
> +++ b/gcc/omp-expand.c
> @@ -5439,6 +5439,13 @@ expand_oacc_for (struct omp_region *region, struct 
> omp_for_data *fd)
>  
> split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
>  
> +   /* Add a dummy exit for the tiled block when cont_bb is missing.  */
> +   if (cont_bb == NULL)
> + {
> +   edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
> +   e->probability = profile_probability::even ();
> + }

I miss here updating of split->probability, if you make e even (the other edge
needs to have probability of 100%-the probability, i.e. even as well.

Jakub

[PATCH, GCC/ARM] Fix PR85261: ICE with FPSCR setter builtin

2018-04-06 Thread Thomas Preudhomme


Instruction pattern for setting the FPSCR expects the input value to be
in a register. However, __builtin_arm_set_fpscr expander does not ensure
that this is the case and as a result GCC ICEs when the builtin is
called with a constant literal.

This commit fixes the builtin to force the input value into a register.
It also remove the unneeded volatile in the existing fpscr test and
fixes the function prototype.

ChangeLog entries are as follows:

*** gcc/ChangeLog ***

2018-04-06  Thomas Preud'homme  

PR target/85261
* config/arm/arm-builtins.c (arm_expand_builtin): Force input operand
into register.

*** gcc/testsuite/ChangeLog ***

2018-04-06  Thomas Preud'homme  

PR target/85261
* gcc.target/arm/fpscr.c: Add call to __builtin_arm_set_fpscr with
literal value.  Expect 2 MCR instruction.  Fix function prototype.
Remove volatile keyword.

Testing: Built an arm-none-eabi GCC cross-compiler and testsuite shows
no regression.

Is this ok for stage4?

Best regards,

Thomas
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 8940d1f6311bccf86664ab2eaa938735eec595f6..e100d933a77c5de4a13cb961d1bff40f57f2ea80 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -2592,7 +2592,7 @@ arm_expand_builtin (tree exp,
 	  icode = CODE_FOR_set_fpscr;
 	  arg0 = CALL_EXPR_ARG (exp, 0);
 	  op0 = expand_normal (arg0);
-	  pat = GEN_FCN (icode) (op0);
+	  pat = GEN_FCN (icode) (force_reg (SImode, op0));
 	}
   emit_insn (pat);
   return target;
diff --git a/gcc/testsuite/gcc.target/arm/fpscr.c b/gcc/testsuite/gcc.target/arm/fpscr.c
index 7b4d71d72d8964f6da0d0604bf59aeb4a895df43..4c3eaf7fcf75ad8582071ecb110fd1e4976a3b24 100644
--- a/gcc/testsuite/gcc.target/arm/fpscr.c
+++ b/gcc/testsuite/gcc.target/arm/fpscr.c
@@ -6,11 +6,14 @@
 /* { dg-add-options arm_fp } */
 
 void
-test_fpscr ()
+test_fpscr (void)
 {
-  volatile unsigned int status = __builtin_arm_get_fpscr ();
+  unsigned status;
+
+  __builtin_arm_set_fpscr (0);
+  status = __builtin_arm_get_fpscr ();
   __builtin_arm_set_fpscr (status);
 }
 
 /* { dg-final { scan-assembler "mrc\tp10, 7, r\[0-9\]+, cr1, cr0, 0" } } */
-/* { dg-final { scan-assembler "mcr\tp10, 7, r\[0-9\]+, cr1, cr0, 0" } } */
+/* { dg-final { scan-assembler-times "mcr\tp10, 7, r\[0-9\]+, cr1, cr0, 0" 2 } } */

Re: Fix PR 83530

2018-04-06 Thread Alexander Monakov

On Tue, 3 Apr 2018, Andrey Belevantsev wrote:

> Hello,
> 
> This issue is when we cannot correctly make insn tick data for the
> unscheduled code (but processed by the modulo scheduler).  Fixed by closely
> following usual scheduling process as described in the PR.

This is ok with the following nit-picks fixed.

> 2018-04-03  Andrey Belevantsev  
> 
>   PR rtl-optimization/83530
> 
>   * sel-sched.c (force_next_insn): New global variable.
>   (remove_insn_for_debug): When force_next_insn is true, also leave only
> next insn
>   in the ready list.
>   (sel_sched_region): When the region wasn't scheduled, make another pass
> over it
>   with force_next_insn set to 1.

Overlong lines.

>   * gcc.dg/pr8350.c: New test.

Typo in test name.
 
> --- a/gcc/sel-sched.c
> +++ b/gcc/sel-sched.c
> @@ -5004,12 +5004,16 @@ remove_temp_moveop_nops (bool full_tidying)
> distinguishing between bookkeeping copies and original insns.  */
>  static int max_uid_before_move_op = 0;
> 
> +/* When true, we're always scheduling next insn on the already scheduled code
> +   to get the right insn data for the following bundling or other passes.  */
> +static int force_next_insn = 0;
> +
>  /* Remove from AV_VLIW_P all instructions but next when debug counter
> tells us so.  Next instruction is fetched from BNDS.  */
>  static void
>  remove_insns_for_debug (blist_t bnds, av_set_t *av_vliw_p)
>  {
> -  if (! dbg_cnt (sel_sched_insn_cnt))
> +  if (! dbg_cnt (sel_sched_insn_cnt) || force_next_insn)
>  /* Leave only the next insn in av_vliw.  */
>  {
>av_set_iterator av_it;
> @@ -7642,7 +7646,13 @@ sel_sched_region (int rgn)
>  sel_sched_region_1 ();
>else
>  /* Force initialization of INSN_SCHED_CYCLEs for correct bundling.  */

I believe this comment needs updating.

Please also consider moving both assignments of reset_sched_cycles_p to
after the if-else statement, just before sel_region_finish call.

> -reset_sched_cycles_p = true;
> +{
> +  reset_sched_cycles_p = false;
> +  pipelining_p = false;
> +  force_next_insn = 1;
> +  sel_sched_region_1 ();
> +  force_next_insn = 0;
> +}
> 
>sel_region_finish (reset_sched_cycles_p);
>  }

Re: Fix PR 83962

2018-04-06 Thread Alexander Monakov

On Tue, 3 Apr 2018, Andrey Belevantsev wrote:

> Hello,
> 
> This issues is about the correct order in which we need to call the
> routines that fix up the control flow for us.

OK with formatting both in the new comment and the Changelog fixed.

> Best,
> Andrey
> 
> 2018-04-03  Andrey Belevantsev  
> 
>   PR rtl-optimization/83962
> 
>   * sel-sched-ir.c (tidy_control_flow): Correct the order in which we call
> tidy_fallthru_edge
>   and tidy_control_flow.
> 
>   * gcc.dg/pr83962.c: New test.
> 
> diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
> index a965d2ec42f..f6de96a7f3d 100644
> --- a/gcc/sel-sched-ir.c
> +++ b/gcc/sel-sched-ir.c
> @@ -3839,9 +3839,13 @@ tidy_control_flow (basic_block xbb, bool full_tidying)
>&& INSN_SCHED_TIMES (BB_END (xbb)) == 0
>&& !IN_CURRENT_FENCE_P (BB_END (xbb)))
>  {
> -  if (sel_remove_insn (BB_END (xbb), false, false))
> -return true;
> +  /* We used to call sel_remove_insn here that can trigger
> tidy_control_flow
> + before we fix up the fallthru edge.  Correct that ordering by
> +explicitly doing the latter before the former.  */
> +  clear_expr (INSN_EXPR (BB_END (xbb)));
>tidy_fallthru_edge (EDGE_SUCC (xbb, 0));
> +  if (tidy_control_flow (xbb, false))
> +   return true;
>  }
> 
>first = sel_bb_head (xbb);
> diff --git a/gcc/testsuite/gcc.dg/pr83962.c b/gcc/testsuite/gcc.dg/pr83962.c
> new file mode 100644
> index 000..0547e218715
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr83962.c
> @@ -0,0 +1,10 @@
> +/* { dg-do compile { target powerpc*-*-* ia64-*-* i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-std=gnu99 -O1 -fselective-scheduling2 -fschedule-insns2
> -fcse-follow-jumps -fno-ssa-phiopt -fno-guess-branch-probability" } */
> +unsigned int ca;
> +
> +void
> +v6 (long long unsigned int as, int p9)
> +{
> +  while (p9 < 1)
> +as = (as != ca) || (as > 1);
> +}
>

[PATCH] [ARC] Fix stack usage info for naked functions.

2018-04-06 Thread Alexey Brodkin

When code containing "naked" function gets compiled with '-fstack-usage'
compiler prints the following warning:
-->8-
board/synopsys/hsdk/hsdk.c: In function 'hsdk_core_init_f':
board/synopsys/hsdk/hsdk.c:345:1: warning: stack usage computation not 
supported for this target
 }
 ^
-->8-

Even though stack calculation makes no sense for "naked" function
we still need to correctly report stack size back to make compiler
happy.

This problem was caught in U-Boot here:
https://lists.denx.de/pipermail/u-boot/2018-March/324325.html

The same fix was done earlier for ARM, see:
"config/arm/arm.c (arm_expand_prologue): Set the stack usage to 0 "
https://github.com/gcc-mirror/gcc/commit/023a7c5dd37

gcc/
2018-04-06  Alexey Brodkin 

* config/arc/arc.c (arc_expand_prologue): Set stack usage info
also for naked functions.
---
 gcc/config/arc/arc.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 32fcb81880a2..3cb4ba5b4dd7 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -3149,7 +3149,11 @@ arc_expand_prologue (void)
 
   /* Naked functions don't have prologue.  */
   if (ARC_NAKED_P (fn_type))
-return;
+{
+  if (flag_stack_usage_info)
+   current_function_static_stack_size = 0;
+  return;
+}
 
   /* Compute total frame size.  */
   size = arc_compute_frame_size ();
-- 
2.14.3

Re: [PATCH, GCC/ARM] Fix PR85261: ICE with FPSCR setter builtin

2018-04-06 Thread Ramana Radhakrishnan

On 06/04/2018 16:54, Thomas Preudhomme wrote:
> Instruction pattern for setting the FPSCR expects the input value to be
> in a register. However, __builtin_arm_set_fpscr expander does not ensure
> that this is the case and as a result GCC ICEs when the builtin is
> called with a constant literal.
> 
> This commit fixes the builtin to force the input value into a register.
> It also remove the unneeded volatile in the existing fpscr test and
> fixes the function prototype.
> 
> ChangeLog entries are as follows:
> 
> *** gcc/ChangeLog ***
> 
> 2018-04-06  Thomas Preud'homme  
> 
>   PR target/85261
>   * config/arm/arm-builtins.c (arm_expand_builtin): Force input operand
>   into register.
> 
> *** gcc/testsuite/ChangeLog ***
> 
> 2018-04-06  Thomas Preud'homme  
> 
>   PR target/85261
>   * gcc.target/arm/fpscr.c: Add call to __builtin_arm_set_fpscr with
>   literal value.  Expect 2 MCR instruction.  Fix function prototype.
>   Remove volatile keyword.
> 
> Testing: Built an arm-none-eabi GCC cross-compiler and testsuite shows
> no regression.
> 
> Is this ok for stage4?
> 
> Best regards,
> 
> Thomas
> 

(sorry about the duplicate for those who get it)


LGTM, though in this case I would prefer a bootstrap and regression run
as this is automatically exercised most with gcc.dg/atomic_*.c and you
really need this tested on linux than just bare-metal as I'm not sure
how this gets tested on arm-none-eabi.

What about earlier branches, have you looked ? This is a silly target
bug and fixes should go back to older branches in this particular case
after baking this on trunk for some time.

regards
Ramana

Re: Fix PR 83913

2018-04-06 Thread Alexander Monakov

On Tue, 3 Apr 2018, Andrey Belevantsev wrote:

> Hello,
> 
> This issue ended up being fixed the way different from described in the PR.
>  We do not want to walk away from the invariant "zero SCHED_TIMES -- insn
> is not scheduled" even for bookkeeping copies (testing showed it trips over
> asserts designed to catch this).  Rather we choose merging exprs in the way
> the larger sched-times wins.

My understanding is this is not a "complete" solution to the problem, and a
chance for a similar blowup on some other testcase remains. Still, avoiding
picking the minimum sched-times value should be a good mitigation.

Please consider adding a comment that the average sched-times value is taken
as a compromise to thwart "endless" pipelining of bookkeeping-producing insns
available anywhere vs. pipelining of useful insns, or something like that?

OK with that considered/added.

> 
> Best,
> Andrey
> 
> 2018-04-03  Andrey Belevantsev  
> 
>   PR rtl-optimization/83913
> 
>   * sel-sched-ir.c (merge_expr_data): Choose the middle between two
> different sched-times
>   when merging exprs.
> 
>   * gcc.dg/pr83913.c: New test.
> 
> diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
> index a965d2ec42f..f6de96a7f3d 100644
> --- a/gcc/sel-sched-ir.c
> +++ b/gcc/sel-sched-ir.c
> @@ -1837,8 +1837,9 @@ merge_expr_data (expr_t to, expr_t from, insn_t
> split_point)
>if (EXPR_PRIORITY (to) < EXPR_PRIORITY (from))
>  EXPR_PRIORITY (to) = EXPR_PRIORITY (from);
> 
> -  if (EXPR_SCHED_TIMES (to) > EXPR_SCHED_TIMES (from))
> -EXPR_SCHED_TIMES (to) = EXPR_SCHED_TIMES (from);
> +  if (EXPR_SCHED_TIMES (to) != EXPR_SCHED_TIMES (from))
> +EXPR_SCHED_TIMES (to) = ((EXPR_SCHED_TIMES (from) + EXPR_SCHED_TIMES (to)
> + + 1) / 2);
> 
>if (EXPR_ORIG_BB_INDEX (to) != EXPR_ORIG_BB_INDEX (from))
>  EXPR_ORIG_BB_INDEX (to) = 0;
> @@ -3293,11 +3294,22 @@ has_dependence_note_mem_dep (rtx mem ATTRIBUTE_UNUSED,
> 
>  /* Note a dependence.  */
>  static void
> diff --git a/gcc/testsuite/gcc.dg/pr83913.c b/gcc/testsuite/gcc.dg/pr83913.c
> new file mode 100644
> index 000..c898d71a261
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr83913.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile { target powerpc*-*-* ia64-*-* i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-O2 -funroll-all-loops -fselective-scheduling
> -fsel-sched-pipelining -fschedule-insns -fno-dce -fno-forward-propagate
> -fno-rerun-cse-after-loop -fno-web" } */
> +
> +int jo, z4;
> +
> +int
> +be (long unsigned int l7, int nt)
> +{
> +  int en;
> +
> +  jo = l7;
> +  for (en = 0; en < 24; ++en)
> +{
> +  jo = (jo / z4) * (!!jo >= ((!!nt) & 2));
> +  if (jo == 0)
> +nt = 0;
> +  else
> +{
> +  nt = z4;
> +  ++z4;
> +  nt = (long unsigned int) nt == (l7 + 1);
> +}
> +}
> +
> +  return nt;
> +}
> 
> 
>

Re: [PATCH, GCC/ARM] Fix PR85261: ICE with FPSCR setter builtin

2018-04-06 Thread Thomas Preudhomme




On 06/04/18 17:08, Ramana Radhakrishnan wrote:

On 06/04/2018 16:54, Thomas Preudhomme wrote:

Instruction pattern for setting the FPSCR expects the input value to be
in a register. However, __builtin_arm_set_fpscr expander does not ensure
that this is the case and as a result GCC ICEs when the builtin is
called with a constant literal.

This commit fixes the builtin to force the input value into a register.
It also remove the unneeded volatile in the existing fpscr test and
fixes the function prototype.

ChangeLog entries are as follows:

*** gcc/ChangeLog ***

2018-04-06  Thomas Preud'homme  

PR target/85261
* config/arm/arm-builtins.c (arm_expand_builtin): Force input operand
into register.

*** gcc/testsuite/ChangeLog ***

2018-04-06  Thomas Preud'homme  

PR target/85261
* gcc.target/arm/fpscr.c: Add call to __builtin_arm_set_fpscr with
literal value.  Expect 2 MCR instruction.  Fix function prototype.
Remove volatile keyword.

Testing: Built an arm-none-eabi GCC cross-compiler and testsuite shows
no regression.

Is this ok for stage4?

Best regards,

Thomas



(sorry about the duplicate for those who get it)


LGTM, though in this case I would prefer a bootstrap and regression run
as this is automatically exercised most with gcc.dg/atomic_*.c and you
really need this tested on linux than just bare-metal as I'm not sure
how this gets tested on arm-none-eabi.


Oh it is indeed. Didn't realized it was used anywhere. Will start bootstrap 
right away.




What about earlier branches, have you looked ? This is a silly target
bug and fixes should go back to older branches in this particular case
after baking this on trunk for some time.


GCC 6 and 7 are affected as well and a backport will be done once it has baked 
long enough of course.


Best regards,

Thomas

Re: Fix PRs 80463, 83972, 83480

2018-04-06 Thread Alexander Monakov

On Tue, 3 Apr 2018, Andrey Belevantsev wrote:

> Hello,
> 
> In these PRs we deal with the dependencies we are forced to make between a
> debug insn and its previous insn (unless bb head).  In the latter case, if
> such an insn has been moved, we fixup its movement so it aligns with the
> sel-sched invariants.  We also carefully adjust seqnos in the case we had a
> single debug insn left in the block.

This is OK with overlong lines fixed and the new comment reworded for clarity
(see below).

> Best,
> Andrey
> 
> 2018-04-03  Andrey Belevantsev  
> 
>   PR rtl-optimization/80463
>   PR rtl-optimization/83972
>   PR rtl-optimization/83480
> 
>   * sel-sched-ir.c (has_dependence_note_mem_dep): Take into account the
> correct producer for the insn.
>   (tidy_control_flow): Fixup seqnos in case of debug insns.
> 
>   * gcc.dg/pr80463.c: New test.
>   * g++.dg/pr80463.C: Likewise.
>   * gcc.dg/pr83972.c: Likewise.
> 
> diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
> index a965d2ec42f..f6de96a7f3d 100644
> --- a/gcc/sel-sched-ir.c
> +++ b/gcc/sel-sched-ir.c
> @@ -3293,11 +3293,22 @@ has_dependence_note_mem_dep (rtx mem ATTRIBUTE_UNUSED,
> 
>  /* Note a dependence.  */
>  static void
> -has_dependence_note_dep (insn_t pro ATTRIBUTE_UNUSED,
> +has_dependence_note_dep (insn_t pro,
>  ds_t ds ATTRIBUTE_UNUSED)
>  {
> -  if (!sched_insns_conditions_mutex_p (has_dependence_data.pro,
> -  VINSN_INSN_RTX
> (has_dependence_data.con)))
> +  insn_t real_pro = has_dependence_data.pro;
> +  insn_t real_con = VINSN_INSN_RTX (has_dependence_data.con);
> +
> +  /* We do not allow for debug insns to move through others unless they
> + are at the start of bb.  Such insns may create bookkeeping copies
> + that would not be able to move up breaking sel-sched invariants.

I have trouble parsing this, it seems a word is accidentally between "move up"
and "breaking". Also the "such" is a bit ambiguous.

> + Detect that here and allow that movement if we allowed it before
> + in the first place.  */
> +  if (DEBUG_INSN_P (real_con)
> +  && INSN_UID (NEXT_INSN (pro)) == INSN_UID (real_con))
> +return;

Should we be concerned about debug insns appearing in sequence here? E.g. if
pro and real_con are not consecutive, but all insns in between are debug insns?

> +
> +  if (!sched_insns_conditions_mutex_p (real_pro, real_con))
>  {
>ds_t *dsp = &has_dependence_data.has_dep_p[has_dependence_data.where];
> 
> @@ -3890,6 +3905,19 @@ tidy_control_flow (basic_block xbb, bool full_tidying)
> 
>gcc_assert (EDGE_SUCC (xbb->prev_bb, 0)->flags & EDGE_FALLTHRU);
> 
> +  /* We could have skipped some debug insns which did not get removed
> with the block,
> + and the seqnos could become incorrect.  Fix them up here.  */
> +  if (MAY_HAVE_DEBUG_INSNS && (sel_bb_head (xbb) != first ||
> sel_bb_end (xbb) != last))
> +   {
> + if (!sel_bb_empty_p (xbb->prev_bb))
> +   {
> + int prev_seqno = INSN_SEQNO (sel_bb_end (xbb->prev_bb));
> + if (prev_seqno > INSN_SEQNO (sel_bb_head (xbb)))
> +   for (insn_t insn = sel_bb_head (xbb); insn != first; insn =
> NEXT_INSN (insn))
> + INSN_SEQNO (insn) = prev_seqno + 1;
> +   }
> +   }
> +
>/* It can turn out that after removing unused jump, basic block
>   that contained that jump, becomes empty too.  In such case
>   remove it too.  */
> diff --git a/gcc/testsuite/g++.dg/pr80463.C b/gcc/testsuite/g++.dg/pr80463.C
> new file mode 100644
> index 000..5614c28ca45
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr80463.C
> @@ -0,0 +1,20 @@
> +/* { dg-do compile { target powerpc*-*-* ia64-*-* i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-g -fselective-scheduling2 -O2
> -fvar-tracking-assignments" } */
> +
> +int *a;
> +int b, c;
> +void
> +d ()
> +{
> +  for (int e; c; e++)
> +switch (e)
> +  {
> +  case 0:
> +   a[e] = 1;
> +  case 1:
> +   b = 2;
> +   break;
> +  default:
> +   a[e] = 3;
> +  }
> +}
> diff --git a/gcc/testsuite/gcc.dg/pr80463.c b/gcc/testsuite/gcc.dg/pr80463.c
> new file mode 100644
> index 000..cebf2fef1f3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr80463.c
> @@ -0,0 +1,54 @@
> +/* { dg-do compile { target powerpc*-*-* ia64-*-* i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-g -O2 -fvar-tracking-assignments -fselective-scheduling2
> -ftree-loop-vectorize -fnon-call-exceptions -fno-tree-vrp -fno-gcse-lm
> -fno-tree-loop-im -fno-reorder-blocks-and-partition -fno-reorder-blocks
> -fno-move-loop-invariants -w" } */
> +
> +short int t2;
> +int cd, aa, ft;
> +
> +void
> +dh (void)
> +{
> +  int qs = 0;
> +
> +  if (t2 < 1)
> +{
> +  int bq = 0;
> +
> +  while (bq < 1)
> +{
> +}
> +
> +  while (t2 < 1)
> +{
> +  if (t2 == 0)
> +{
>

[C++ PATCH] Fix structured binding tsubst error recovery (PR c++/85210)

2018-04-06 Thread Jakub Jelinek

Hi!

During parsing we report error here, the decomp id shadowing parameter,
but we still have a VAR_DECL for the decomp id, only during
tsubst_decomp_names tsubst returns a PARM_DECL for it.

I believe and (the code asserts that) this can only happen during error
recovery and we just should punt on it, we don't want to pushdecl it, nor
try to create lds_decomp DECL_LANG_SPECIFIC for it (PARM_DECLs have
lds_parm, so that is where we ICE on this testcase), etc.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-04-06  Jakub Jelinek  

PR c++/85210
* pt.c (tsubst_decomp_names): Return error_mark_node and assert
errorcount is set if tsubst doesn't return a VAR_DECL.

* g++.dg/cpp1z/decomp42.C: New test.

--- gcc/cp/pt.c.jj  2018-04-05 23:30:11.315435539 +0200
+++ gcc/cp/pt.c 2018-04-06 11:46:34.170154030 +0200
@@ -16235,6 +16235,12 @@ tsubst_decomp_names (tree decl, tree pat
   DECL_HAS_VALUE_EXPR_P (decl2) = 1;
   if (VAR_P (decl3))
DECL_TEMPLATE_INSTANTIATED (decl3) = 1;
+  else
+   {
+ gcc_assert (errorcount);
+ decl = error_mark_node;
+ continue;
+   }
   maybe_push_decl (decl3);
   if (error_operand_p (decl3))
decl = error_mark_node;
--- gcc/testsuite/g++.dg/cpp1z/decomp42.C.jj2018-04-06 11:45:39.724162398 
+0200
+++ gcc/testsuite/g++.dg/cpp1z/decomp42.C   2018-04-06 11:45:39.724162398 
+0200
@@ -0,0 +1,18 @@
+// PR c++/85210
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+struct A { int i; };
+
+template 
+void
+foo (int j)
+{
+  auto [j] = A{j}; // { dg-error "shadows a parameter" }
+}  // { dg-warning "structured bindings only available 
with" "" { target c++14_down } .-1 }
+
+void
+bar ()
+{
+  foo<0> (0);
+}

Jakub

[PATCH] Fix create_preheader ICE (PR rtl-optimization/84872)

2018-04-06 Thread Jakub Jelinek

Hi!

When create_preheader is called with CP_FALLTHRU_PREHEADERS and
the loop header is cold, but the bb dominating the loop is hot (or vice
versa) and there are just 2 incoming edges into the header, we use
split_edge, which unfortunately make the new bb use the partition of the
source rather than destination, so this newly created preheader is never
fallthru without jump, because the edge is EDGE_CROSSING.

Fixed by forcing to use make_forwarder_block in that case, that uses the
partition of the bb on which it is called (i.e. loop header).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-04-06  Jakub Jelinek  

PR rtl-optimization/84872
* cfgloopmanip.c (create_preheader): Use make_forwarder_block even if
nentry == 1 when CP_FALLTHRU_PREHEADERS and single_entry is
EDGE_CROSSING edge.

* gcc.dg/graphite/pr84872.c: New test.

--- gcc/cfgloopmanip.c.jj   2018-01-03 10:19:56.065534103 +0100
+++ gcc/cfgloopmanip.c  2018-04-06 12:57:53.141922946 +0200
@@ -1494,7 +1494,9 @@ create_preheader (struct loop *loop, int
 
   mfb_kj_edge = loop_latch_edge (loop);
   latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
-  if (nentry == 1)
+  if (nentry == 1
+  && ((flags & CP_FALLTHRU_PREHEADERS) == 0
+ || (single_entry->flags & EDGE_CROSSING) == 0))
 dummy = split_edge (single_entry);
   else
 {
--- gcc/testsuite/gcc.dg/graphite/pr84872.c.jj  2018-04-06 13:06:07.017014715 
+0200
+++ gcc/testsuite/gcc.dg/graphite/pr84872.c 2018-04-06 13:05:46.358011993 
+0200
@@ -0,0 +1,19 @@
+/* PR rtl-optimization/84872 */
+/* { dg-do compile { target pthread } } */
+/* { dg-options "-O1 -floop-parallelize-all -freorder-blocks-and-partition 
-fschedule-insns2 -fselective-scheduling2 -fsel-sched-pipelining -fno-tree-dce" 
} */
+
+void
+foo (int x)
+{
+  int a[2];
+  int b, c = 0;
+
+  for (b = 0; b < 2; ++b)
+a[b] = 0;
+  for (b = 0; b < 2; ++b)
+a[b] = 0;
+
+  while (c < 1)
+while (x < 1)
+  ++x;
+}

Jakub

[patch, libfortran] Fix PR 88235, buffer overrun in matmul

2018-04-06 Thread Thomas König


Hello world,

the attached patch fixes a buffer overrun in matmul, an 8 regression.
No test case since this was only detectable with the address sanitizer
or with valgrind.

Regression-tested on trunk. OK?

Regards

Thomas

2018-04-06  Thomas Koenig  

PR libfortran/85253
* m4/matmul_internal.m4: If ycount == 1, add one more row to
the internal buffer.
* generated/matmul_c10.c: Regenerated.
* generated/matmul_c16.c: Regenerated.
* generated/matmul_c4.c: Regenerated.
* generated/matmul_c8.c: Regenerated.
* generated/matmul_i1.c: Regenerated.
* generated/matmul_i16.c: Regenerated.
* generated/matmul_i2.c: Regenerated.
* generated/matmul_i4.c: Regenerated.
* generated/matmul_i8.c: Regenerated.
* generated/matmul_r10.c: Regenerated.
* generated/matmul_r16.c: Regenerated.
* generated/matmul_r4.c: Regenerated.
* generated/matmul_r8.c: Regenerated.
* generated/matmulavx128_c10.c: Regenerated.
* generated/matmulavx128_c16.c: Regenerated.
* generated/matmulavx128_c4.c: Regenerated.
* generated/matmulavx128_c8.c: Regenerated.
* generated/matmulavx128_i1.c: Regenerated.
* generated/matmulavx128_i16.c: Regenerated.
* generated/matmulavx128_i2.c: Regenerated.
* generated/matmulavx128_i4.c: Regenerated.
* generated/matmulavx128_i8.c: Regenerated.
* generated/matmulavx128_r10.c: Regenerated.
* generated/matmulavx128_r16.c: Regenerated.
* generated/matmulavx128_r4.c: Regenerated.
* generated/matmulavx128_r8.c: Regenerated.
Index: m4/matmul_internal.m4
===
--- m4/matmul_internal.m4	(Revision 259152)
+++ m4/matmul_internal.m4	(Arbeitskopie)
@@ -234,7 +234,7 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
 
   /* Adjust size of t1 to what is needed.  */
   index_type t1_dim;
-  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
   if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_c10.c
===
--- generated/matmul_c10.c	(Revision 259152)
+++ generated/matmul_c10.c	(Arbeitskopie)
@@ -318,7 +318,7 @@ matmul_c10_avx (gfc_array_c10 * const restrict ret
 
   /* Adjust size of t1 to what is needed.  */
   index_type t1_dim;
-  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
   if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -870,7 +870,7 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict re
 
   /* Adjust size of t1 to what is needed.  */
   index_type t1_dim;
-  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
   if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1422,7 +1422,7 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict
 
   /* Adjust size of t1 to what is needed.  */
   index_type t1_dim;
-  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
   if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1988,7 +1988,7 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict
 
   /* Adjust size of t1 to what is needed.  */
   index_type t1_dim;
-  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
   if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -2614,7 +2614,7 @@ matmul_c10 (gfc_array_c10 * const restrict retarra
 
   /* Adjust size of t1 to what is needed.  */
   index_type t1_dim;
-  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
   if (t1_dim > 65536)
 	t1_dim = 65536;
 
Index: generated/matmul_c16.c
===
--- generated/matmul_c16.c	(Revision 259152)
+++ generated/matmul_c16.c	(Arbeitskopie)
@@ -318,7 +318,7 @@ matmul_c16_avx (gfc_array_c16 * const restrict ret
 
   /* Adjust size of t1 to what is needed.  */
   index_type t1_dim;
-  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
   if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -870,7 +870,7 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict re
 
   /* Adjust size of t1 to what is needed.  */
   index_type t1_dim;
-  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
   if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1422,7 +1422,7 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict
 
   /* Adjust size of t1 to what is needed.  */
   index_type t1_dim;
-  t1_dim = (a_dim1-1) * 256 + b_dim1;
+  t1_dim = (a_dim1 - (ycount > 1)) * 256 + b_dim1;
   if (t1_dim > 65536)
 	t1_dim = 65536;
 
@@ -1988,7 +1988,7 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict
 
   /* Adjust size of t1 to what i

Re: [C++ Patch] PR 85227 ("[7/8/ Regression] ICE with structured binding of a forward declared variable")

2018-04-06 Thread Jason Merrill

On Fri, Apr 6, 2018 at 5:01 AM, Paolo Carlini  wrote:
> here, for an incomplete type we ICE pretty soon in find_decomp_class_base.
> Comparing to other cases too, I convinced myself that trying to complete the
> type is Ok. Also, it seems that in these functions we want to talk about
> structured binding and use an appropriate location, thus no
> complete_type_or_maybe_complain. Tested x86_64-linux.

What if, in a template, we defer trying to do bindings to an incomplete type, so

extern struct A a;

template
void f()
{
  auto [x] = a;
}

struct A { int i; };

int main()
{
  f<0>();
}

works?  Probably with a pedwarn, as in xref_basetypes or
cp_parser_dot_deref_incomplete.

Jason

Re: [C++ PATCH] Fix structured binding tsubst error recovery (PR c++/85210)

2018-04-06 Thread Jason Merrill

OK.

On Fri, Apr 6, 2018 at 11:44 AM, Jakub Jelinek  wrote:
> Hi!
>
> During parsing we report error here, the decomp id shadowing parameter,
> but we still have a VAR_DECL for the decomp id, only during
> tsubst_decomp_names tsubst returns a PARM_DECL for it.
>
> I believe and (the code asserts that) this can only happen during error
> recovery and we just should punt on it, we don't want to pushdecl it, nor
> try to create lds_decomp DECL_LANG_SPECIFIC for it (PARM_DECLs have
> lds_parm, so that is where we ICE on this testcase), etc.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2018-04-06  Jakub Jelinek  
>
> PR c++/85210
> * pt.c (tsubst_decomp_names): Return error_mark_node and assert
> errorcount is set if tsubst doesn't return a VAR_DECL.
>
> * g++.dg/cpp1z/decomp42.C: New test.
>
> --- gcc/cp/pt.c.jj  2018-04-05 23:30:11.315435539 +0200
> +++ gcc/cp/pt.c 2018-04-06 11:46:34.170154030 +0200
> @@ -16235,6 +16235,12 @@ tsubst_decomp_names (tree decl, tree pat
>DECL_HAS_VALUE_EXPR_P (decl2) = 1;
>if (VAR_P (decl3))
> DECL_TEMPLATE_INSTANTIATED (decl3) = 1;
> +  else
> +   {
> + gcc_assert (errorcount);
> + decl = error_mark_node;
> + continue;
> +   }
>maybe_push_decl (decl3);
>if (error_operand_p (decl3))
> decl = error_mark_node;
> --- gcc/testsuite/g++.dg/cpp1z/decomp42.C.jj2018-04-06 11:45:39.724162398 
> +0200
> +++ gcc/testsuite/g++.dg/cpp1z/decomp42.C   2018-04-06 11:45:39.724162398 
> +0200
> @@ -0,0 +1,18 @@
> +// PR c++/85210
> +// { dg-do compile { target c++11 } }
> +// { dg-options "" }
> +
> +struct A { int i; };
> +
> +template 
> +void
> +foo (int j)
> +{
> +  auto [j] = A{j}; // { dg-error "shadows a parameter" }
> +}  // { dg-warning "structured bindings only available 
> with" "" { target c++14_down } .-1 }
> +
> +void
> +bar ()
> +{
> +  foo<0> (0);
> +}
>
> Jakub

Re: [PATCH] Fix dwarf2out ICE on zero sized array initializer (PR debug/85252)

2018-04-06 Thread Richard Biener

On April 6, 2018 6:10:14 PM GMT+02:00, Jakub Jelinek  wrote:
>Hi!
>
>As mentioned in the PR, we ICE on the following zero sized array
>initializers, while domain is non-NULL and TYPE_MIN_VALUE is size_int
>(0),
>TYPE_MAX_VALUE is NULL and not INTEGER_CST compare_tree_int assumes it
>is.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK. 

Richard. 

>2018-04-06  Jakub Jelinek  
>
>   PR debug/85252
>   * dwarf2out.c (rtl_for_decl_init): For STRING_CST initializer only
>   build CONST_STRING if TYPE_MAX_VALUE is non-NULL and is INTEGER_CST.
>
>   * gcc.dg/debug/pr85252.c: New test.
>
>--- gcc/dwarf2out.c.jj 2018-04-04 16:13:43.141551164 +0200
>+++ gcc/dwarf2out.c2018-04-06 14:05:42.278496761 +0200
>@@ -19596,6 +19596,8 @@ rtl_for_decl_init (tree init, tree type)
>   if (is_int_mode (TYPE_MODE (enttype), &mode)
> && GET_MODE_SIZE (mode) == 1
> && domain
>+&& TYPE_MAX_VALUE (domain)
>+&& TREE_CODE (TYPE_MAX_VALUE (domain)) == INTEGER_CST
> && integer_zerop (TYPE_MIN_VALUE (domain))
> && compare_tree_int (TYPE_MAX_VALUE (domain),
>  TREE_STRING_LENGTH (init) - 1) == 0
>--- gcc/testsuite/gcc.dg/debug/pr85252.c.jj2018-04-06
>14:08:02.507520855 +0200
>+++ gcc/testsuite/gcc.dg/debug/pr85252.c   2018-04-06 14:07:29.200514380
>+0200
>@@ -0,0 +1,11 @@
>+/* PR debug/85252 */
>+/* { dg-do compile } */
>+
>+void
>+foo (void)
>+{
>+  static char a[0] = "";
>+  static char b[0] = "b"; /* { dg-warning "initializer-string for
>array of chars is too long" } */
>+  static char c[1] = "c";
>+  static char d[1] = "de";/* { dg-warning "initializer-string for
>array of chars is too long" } */
>+}
>
>   Jakub

Re: [PATCH] Fix create_preheader ICE (PR rtl-optimization/84872)

2018-04-06 Thread Richard Biener

On April 6, 2018 6:04:48 PM GMT+02:00, Jakub Jelinek  wrote:
>Hi!
>
>When create_preheader is called with CP_FALLTHRU_PREHEADERS and
>the loop header is cold, but the bb dominating the loop is hot (or vice
>versa) and there are just 2 incoming edges into the header, we use
>split_edge, which unfortunately make the new bb use the partition of
>the
>source rather than destination, so this newly created preheader is
>never
>fallthru without jump, because the edge is EDGE_CROSSING.
>
>Fixed by forcing to use make_forwarder_block in that case, that uses
>the
>partition of the bb on which it is called (i.e. loop header).
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK. 

Richard. 
>2018-04-06  Jakub Jelinek  
>
>   PR rtl-optimization/84872
>   * cfgloopmanip.c (create_preheader): Use make_forwarder_block even if
>   nentry == 1 when CP_FALLTHRU_PREHEADERS and single_entry is
>   EDGE_CROSSING edge.
>
>   * gcc.dg/graphite/pr84872.c: New test.
>
>--- gcc/cfgloopmanip.c.jj  2018-01-03 10:19:56.065534103 +0100
>+++ gcc/cfgloopmanip.c 2018-04-06 12:57:53.141922946 +0200
>@@ -1494,7 +1494,9 @@ create_preheader (struct loop *loop, int
> 
>   mfb_kj_edge = loop_latch_edge (loop);
>   latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
>-  if (nentry == 1)
>+  if (nentry == 1
>+  && ((flags & CP_FALLTHRU_PREHEADERS) == 0
>+|| (single_entry->flags & EDGE_CROSSING) == 0))
> dummy = split_edge (single_entry);
>   else
> {
>--- gcc/testsuite/gcc.dg/graphite/pr84872.c.jj 2018-04-06
>13:06:07.017014715 +0200
>+++ gcc/testsuite/gcc.dg/graphite/pr84872.c2018-04-06
>13:05:46.358011993 +0200
>@@ -0,0 +1,19 @@
>+/* PR rtl-optimization/84872 */
>+/* { dg-do compile { target pthread } } */
>+/* { dg-options "-O1 -floop-parallelize-all
>-freorder-blocks-and-partition -fschedule-insns2
>-fselective-scheduling2 -fsel-sched-pipelining -fno-tree-dce" } */
>+
>+void
>+foo (int x)
>+{
>+  int a[2];
>+  int b, c = 0;
>+
>+  for (b = 0; b < 2; ++b)
>+a[b] = 0;
>+  for (b = 0; b < 2; ++b)
>+a[b] = 0;
>+
>+  while (c < 1)
>+while (x < 1)
>+  ++x;
>+}
>
>   Jakub

[PATCH] Fix dwarf2out ICE on zero sized array initializer (PR debug/85252)

2018-04-06 Thread Jakub Jelinek

Hi!

As mentioned in the PR, we ICE on the following zero sized array
initializers, while domain is non-NULL and TYPE_MIN_VALUE is size_int (0),
TYPE_MAX_VALUE is NULL and not INTEGER_CST compare_tree_int assumes it is.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-04-06  Jakub Jelinek  

PR debug/85252
* dwarf2out.c (rtl_for_decl_init): For STRING_CST initializer only
build CONST_STRING if TYPE_MAX_VALUE is non-NULL and is INTEGER_CST.

* gcc.dg/debug/pr85252.c: New test.

--- gcc/dwarf2out.c.jj  2018-04-04 16:13:43.141551164 +0200
+++ gcc/dwarf2out.c 2018-04-06 14:05:42.278496761 +0200
@@ -19596,6 +19596,8 @@ rtl_for_decl_init (tree init, tree type)
   if (is_int_mode (TYPE_MODE (enttype), &mode)
  && GET_MODE_SIZE (mode) == 1
  && domain
+ && TYPE_MAX_VALUE (domain)
+ && TREE_CODE (TYPE_MAX_VALUE (domain)) == INTEGER_CST
  && integer_zerop (TYPE_MIN_VALUE (domain))
  && compare_tree_int (TYPE_MAX_VALUE (domain),
   TREE_STRING_LENGTH (init) - 1) == 0
--- gcc/testsuite/gcc.dg/debug/pr85252.c.jj 2018-04-06 14:08:02.507520855 
+0200
+++ gcc/testsuite/gcc.dg/debug/pr85252.c2018-04-06 14:07:29.200514380 
+0200
@@ -0,0 +1,11 @@
+/* PR debug/85252 */
+/* { dg-do compile } */
+
+void
+foo (void)
+{
+  static char a[0] = "";
+  static char b[0] = "b";  /* { dg-warning "initializer-string for array 
of chars is too long" } */
+  static char c[1] = "c";
+  static char d[1] = "de"; /* { dg-warning "initializer-string for array 
of chars is too long" } */
+}

Jakub

PR c++/85214 - ICE with alias, generic lambda, constexpr if.

2018-04-06 Thread Jason Merrill

Here, since the condition for the constexpr if depends on the type of
'j', it's still dependent when we are partially instantiating the
inner lambda, so we need to defer instantiating the constexpr if.
When we instantiated the inner lambda, we tried to substitute into the
typename, which failed because we didn't have a declaration of 'i'
available.

Fixed by teaching extract_locals_r to capture local typedefs such as
'ar'; if we have the typedef handy, we don't need to substitute into
its  definition.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit f55b3ab9ac0cd82137c4bdeca7304f8097da5c3d
Author: Jason Merrill 
Date:   Fri Apr 6 13:46:50 2018 -0400

PR c++/85214 - ICE with alias, generic lambda, constexpr if.

Here, since the condition for the constexpr if depends on the type of 'j',
it's still dependent when we are partially instantiating the inner lambda,
so we need to defer instantiating the constexpr if.  When we instantiated
the inner lambda, we tried to substitute into the typename, which failed
because we didn't have a declaration of 'i' available.

Fixed by teaching extract_locals_r to capture local typedefs such as 'ar';
if we have the typedef handy, we don't need to substitute into its
definition.

* pt.c (extract_locals_r): Remember local typedefs.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 3bac7563992..6cd48c6688a 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -11610,6 +11610,11 @@ extract_locals_r (tree *tp, int */*walk_subtrees*/, void *data_)
   el_data &data = *reinterpret_cast(data_);
   tree *extra = &data.extra;
   tsubst_flags_t complain = data.complain;
+
+  if (TYPE_P (*tp) && typedef_variant_p (*tp))
+/* Remember local typedefs (85214).  */
+tp = &TYPE_NAME (*tp);
+
   if (TREE_CODE (*tp) == DECL_EXPR)
 data.internal.add (DECL_EXPR_DECL (*tp));
   else if (tree spec = retrieve_local_specialization (*tp))
diff --git a/gcc/testsuite/g++.dg/cpp1z/constexpr-if20.C b/gcc/testsuite/g++.dg/cpp1z/constexpr-if20.C
new file mode 100644
index 000..24343adb748
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/constexpr-if20.C
@@ -0,0 +1,17 @@
+// PR c++/85214
+// { dg-additional-options -std=c++17 }
+
+struct g {
+  constexpr operator int() { return true; }
+};
+template  constexpr bool m = true;
+template  struct C { typedef double q; };
+void ao() {
+  [](auto i) {
+  using ar = typename C::q;
+  [](auto j) {
+	using as = typename C::q;
+	if constexpr (m) {}
+  }(g());
+  }(g());
+}

Re: C++ PATCH for c++/85032, rejects-valid with if constexpr in template

2018-04-06 Thread Marek Polacek

On Mon, Mar 26, 2018 at 01:17:22PM -0400, Jason Merrill wrote:
> On Sat, Mar 24, 2018 at 6:59 AM, Marek Polacek  wrote:
> > Recently the code in finish_static_assert was changed to use
> > perform_implicit_conversion_flags followed by fold_non_dependent_expr.  That
> > broke this test becase when in a template, p_i_c_f merely wraps the expr in
> > an IMPLICIT_CONV_EXPR.  fold_non_dependent_expr should be able to fold it to
> > a constant but it gave up because is_nondependent_constant_expression 
> > returned
> > false.  Jason suggested to fix this roughly like the following, i.e. 
> > consider
> > conversions from classes to literal types potentially constant.
> >
> > Bootstrapped/regtested on x86_64-linux, ok for trunk?
> >
> > 2018-03-24  Marek Polacek  
> >
> > PR c++/85032
> > * constexpr.c (potential_constant_expression_1): Consider 
> > conversions
> > from classes to literal types potentially constant.
> >
> > * g++.dg/cpp0x/pr51225.C: Adjust error message.
> > * g++.dg/cpp1z/constexpr-if17.C: New test.
> >
> > diff --git gcc/cp/constexpr.c gcc/cp/constexpr.c
> > index bebd9f5b5d0..c4b5afe90a2 100644
> > --- gcc/cp/constexpr.c
> > +++ gcc/cp/constexpr.c
> > @@ -5768,6 +5768,23 @@ potential_constant_expression_1 (tree t, bool 
> > want_rval, bool strict, bool now,
> >   TREE_TYPE (t));
> >   return false;
> > }
> > +  /* This might be a conversion from a class to a literal type.  Let's
> > +consider it potentially constant since the conversion might be
> > +a constexpr user-defined conversion.  */
> > +  else if (cxx_dialect >= cxx11
> > +  && COMPLETE_TYPE_P (TREE_TYPE (t))
> > +  && literal_type_p (TREE_TYPE (t))
> 
> We probably need to allow dependent types here, too.  And incomplete
> classes, which might turn out to be literal later.

Ok, I've allowed incomplete types, too.  And I think the patch also allows
dependent types.  Or did you mean using
   && (TREE_TYPE (t) == NULL_TREE
   || !COMPLETE_TYPE_P (TREE_TYPE (t))
   || literal_type_p (TREE_TYPE (t)))
?  That doesn't seem to be needed.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2018-04-06  Marek Polacek  

PR c++/85032
* constexpr.c (potential_constant_expression_1): Consider conversions
from classes to literal types potentially constant.

* g++.dg/cpp0x/pr51225.C: Adjust error message.
* g++.dg/cpp1z/constexpr-if20.C: New test.

diff --git gcc/cp/constexpr.c gcc/cp/constexpr.c
index 3cc196b4d17..916060312f0 100644
--- gcc/cp/constexpr.c
+++ gcc/cp/constexpr.c
@@ -5777,6 +5777,23 @@ potential_constant_expression_1 (tree t, bool want_rval, 
bool strict, bool now,
  TREE_TYPE (t));
  return false;
}
+  /* This might be a conversion from a class to a (potentially) literal
+type.  Let's consider it potentially constant since the conversion
+might be a constexpr user-defined conversion.  */
+  else if (cxx_dialect >= cxx11
+  && (!COMPLETE_TYPE_P (TREE_TYPE (t))
+  || literal_type_p (TREE_TYPE (t)))
+  && TREE_OPERAND (t, 0))
+   {
+ tree type = TREE_TYPE (TREE_OPERAND (t, 0));
+ /* If this is a dependent type, it could end up being a class
+with conversions.  */
+ if (type == NULL_TREE || WILDCARD_TYPE_P (type))
+   return true;
+ /* Or a non-dependent class which has conversions.  */
+ else if (CLASS_TYPE_P (type) && TYPE_HAS_CONVERSION (type))
+   return true;
+   }
 
   return (RECUR (TREE_OPERAND (t, 0),
 TREE_CODE (TREE_TYPE (t)) != REFERENCE_TYPE));
diff --git gcc/testsuite/g++.dg/cpp0x/pr51225.C 
gcc/testsuite/g++.dg/cpp0x/pr51225.C
index f80bd0e778e..5b4e432f7ed 100644
--- gcc/testsuite/g++.dg/cpp0x/pr51225.C
+++ gcc/testsuite/g++.dg/cpp0x/pr51225.C
@@ -5,7 +5,7 @@ template struct A {};
 
 template void foo()
 {
-  A a; // { dg-error "not declared|invalid type" }
+  A a; // { dg-error "not declared|could not convert" }
 }
 
 template struct bar
diff --git gcc/testsuite/g++.dg/cpp1z/constexpr-if20.C 
gcc/testsuite/g++.dg/cpp1z/constexpr-if20.C
index e69de29bb2d..56e108be4ad 100644
--- gcc/testsuite/g++.dg/cpp1z/constexpr-if20.C
+++ gcc/testsuite/g++.dg/cpp1z/constexpr-if20.C
@@ -0,0 +1,22 @@
+// PR c++/85032
+// { dg-options -std=c++17 }
+
+struct A
+{
+  constexpr operator bool () { return true; }
+  int i;
+};
+
+A a;
+
+template 
+void f()
+{
+  constexpr bool b = a;
+  static_assert (a);
+}
+
+int main()
+{
+  f();
+}

Marek

Re: [patch, libfortran] Fix PR 88235, buffer overrun in matmul

2018-04-06 Thread Steve Kargl

On Fri, Apr 06, 2018 at 06:52:36PM +0200, Thomas König wrote:
> 
> the attached patch fixes a buffer overrun in matmul, an 8 regression.
> No test case since this was only detectable with the address sanitizer
> or with valgrind.
> 
> Regression-tested on trunk. OK?
> 

Yes.

-- 
Steve

Re: [C++ Patch] PR 85227 ("[7/8/ Regression] ICE with structured binding of a forward declared variable")

2018-04-06 Thread Paolo Carlini


Hi,

On 06/04/2018 19:04, Jason Merrill wrote:

On Fri, Apr 6, 2018 at 5:01 AM, Paolo Carlini  wrote:

here, for an incomplete type we ICE pretty soon in find_decomp_class_base.
Comparing to other cases too, I convinced myself that trying to complete the
type is Ok. Also, it seems that in these functions we want to talk about
structured binding and use an appropriate location, thus no
complete_type_or_maybe_complain. Tested x86_64-linux.

What if, in a template, we defer trying to do bindings to an incomplete type, so

extern struct A a;

template
void f()
{
   auto [x] = a;
}

struct A { int i; };

int main()
{
   f<0>();
}

works?  Probably with a pedwarn, as in xref_basetypes or
cp_parser_dot_deref_incomplete.
Ok... I tested the very simple patch below, wasnt sure between pedwarn 
(loc, 0, ...) and pedwarn (loc, OPT_Wpedantic, ...) but probably we want 
to former in order not to be too permissive (for comparison, clang 
rejects with an hard error both tests). What do you think?


Thanks!
Paolo.

/
Index: cp/decl.c
===
--- cp/decl.c   (revision 259184)
+++ cp/decl.c   (working copy)
@@ -7756,6 +7756,9 @@ cp_finish_decomp (tree decl, tree first, unsigned
   error_at (loc, "cannot decompose lambda closure type %qT", type);
   goto error_out;
 }
+  else if (processing_template_decl && !COMPLETE_TYPE_P (type))
+pedwarn (loc, 0, "structured binding refers to incomplete class type %qT",
+type);
   else
 {
   tree btype = find_decomp_class_base (loc, type, NULL_TREE);
Index: testsuite/g++.dg/cpp1z/decomp43.C
===
--- testsuite/g++.dg/cpp1z/decomp43.C   (nonexistent)
+++ testsuite/g++.dg/cpp1z/decomp43.C   (working copy)
@@ -0,0 +1,10 @@
+// PR c++/85227
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+extern struct A a;
+
+template void foo()
+{
+  auto[i] = a;  // { dg-warning "incomplete" }
+}  // { dg-warning "structured bindings only available with -std=c..17 or 
-std=gnu..17" "" { target c++14_down } .-1 }
Index: testsuite/g++.dg/cpp1z/decomp44.C
===
--- testsuite/g++.dg/cpp1z/decomp44.C   (nonexistent)
+++ testsuite/g++.dg/cpp1z/decomp44.C   (working copy)
@@ -0,0 +1,18 @@
+// PR c++/85227
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+extern struct A a;
+
+template
+void f()
+{
+  auto [x] = a;  // { dg-warning "incomplete" }
+}  // { dg-warning "structured bindings only available with -std=c..17 or 
-std=gnu..17" "" { target c++14_down } .-1 }
+
+struct A { int i; };
+
+int main()
+{
+  f<0>();
+}

[PATCH] v2: Show pertinent parameter (PR c++/85110)

2018-04-06 Thread David Malcolm

On Thu, 2018-03-29 at 09:20 -0400, Jason Merrill wrote:
> On Wed, Mar 28, 2018 at 4:44 PM, David Malcolm 
> wrote:
> > This followup patch updates the specific error-handling path
> > to add a note showing the pertinent parameter decl, taking
> > the output from:
> > 
> > test.cc: In function 'void caller(const char*)':
> > test.cc:6:14: error: cannot convert 'const char*' to 'const char**'
> > for argument '2' to 'void callee(int, const char**, int)'
> >callee (1, fmt, 3);
> >   ^~~
> > 
> > to:
> > 
> > test.cc: In function 'void caller(const char*)':
> > test.cc:6:14: error: cannot convert 'const char*' to 'const char**'
> > for argument '2' to 'void callee(int, const char**, int)'
> >callee (1, fmt, 3);
> >   ^~~
> > test.cc:1:36: note:   initializing argument 2 of 'void callee(int,
> > const char**, int)'
> >  void callee (int one, const char **two, int three);
> >~^~~
> > 
> > Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu; adds
> > a further 18 PASS results to g++.sum.
> > 
> > Again, not a regression as such, but I've been calling out the
> > underlined
> > arguments as a feature of gcc 8, so would be good to fix.
> > 
> > OK for trunk?
> > 
> > gcc/cp/ChangeLog:
> > PR c++/85110
> > * call.c (get_fndecl_argument_location): Make non-static.
> > * cp-tree.h (get_fndecl_argument_location): New decl.
> > * typeck.c (convert_for_assignment): When complaining due
> > to
> > conversions for an argument, show the location of the
> > parameter
> > within the decl.
> > 
> > gcc/testsuite/ChangeLog:
> > PR c++/85110
> > * g++.dg/diagnostic/param-type-mismatch-2.C: Update for the
> > cases
> > where we now show the pertinent parameter.
> > ---
> >  gcc/cp/call.c   |  2 +-
> >  gcc/cp/cp-tree.h|  2 ++
> >  gcc/cp/typeck.c | 10 +++
> > ---
> >  .../g++.dg/diagnostic/param-type-mismatch-2.C   | 21
> > ++---
> >  4 files changed, 28 insertions(+), 7 deletions(-)
> > 
> > diff --git a/gcc/cp/call.c b/gcc/cp/call.c
> > index 1a87f99..e1a0639 100644
> > --- a/gcc/cp/call.c
> > +++ b/gcc/cp/call.c
> > @@ -6598,7 +6598,7 @@ maybe_print_user_conv_context (conversion
> > *convs)
> > ARGNUM is zero based, -1 indicates the `this' argument of a
> > method.
> > Return the location of the FNDECL itself if there are
> > problems.  */
> > 
> > -static location_t
> > +location_t
> >  get_fndecl_argument_location (tree fndecl, int argnum)
> >  {
> >int i;
> > diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
> > index d5382c2..b45880d 100644
> > --- a/gcc/cp/cp-tree.h
> > +++ b/gcc/cp/cp-tree.h
> > @@ -5965,6 +5965,8 @@ extern bool
> > can_convert_arg   (tree, tree, tree, int,
> >  tsubst_flags_t);
> >  extern bool can_convert_arg_bad(tree,
> > tree, tree, int,
> >  tsubst_flags_t);
> > +extern location_t get_fndecl_argument_location  (tree, int);
> > +
> > 
> >  /* A class for recording information about access failures (e.g.
> > private
> > fields), so that we can potentially supply a fix-it hint about
> > diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
> > index e733c79..742b2e9 100644
> > --- a/gcc/cp/typeck.c
> > +++ b/gcc/cp/typeck.c
> > @@ -8781,9 +8781,13 @@ convert_for_assignment (tree type, tree rhs,
> >parmnum,
> > complain, flags);
> > }
> >   else if (fndecl)
> > -   error_at (EXPR_LOC_OR_LOC (rhs, input_location),
> > - "cannot convert %qH to %qI for argument
> > %qP to %qD",
> > - rhstype, type, parmnum, fndecl);
> > +   {
> > + error_at (EXPR_LOC_OR_LOC (rhs, input_location),
> > +   "cannot convert %qH to %qI for argument
> > %qP to %qD",
> > +   rhstype, type, parmnum, fndecl);
> > + inform (get_fndecl_argument_location (fndecl,
> > parmnum),
> > + "  initializing argument %P of %qD",
> > parmnum, fndecl);
> 
> If you're adding the inform, you don't need the %P of %D in the
> initial error.
> 
> Jason

Thanks.  Here's an updated version of the patch which removes them.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu; adds
18 PASS results to g++.sum.

OK for trunk?

gcc/cp/ChangeLog:
PR c++/85110
* call.c (get_fndecl_argument_location): Make non-static.
* cp-tree.h (get_fndecl_argument_location): New decl.
* typeck.c (convert_for_assignment): When complaining due to
conversions for an argument, show the location of the parameter
within the decl.

gcc/testsuite/Change

Re: [AARCH64] Neon vld1__x3, vst1__x2 and vst1_*_x3 intrinsics

2018-04-06 Thread Christophe Lyon

Hi,

2018-04-06 12:15 GMT+02:00 Sameera Deshpande :
> Hi Christophe,
>
> Please find attached the updated patch with testcases.
>
> Ok for trunk?

Thanks for the update.

Since the new intrinsics are only available on aarch64, you want to
prevent the tests from running on arm.
Indeed gcc.target/aarch64/advsimd-intrinsics/ is shared between the two targets.
There are several examples on how to do that in that directory.

I have also noticed that the tests fail at execution on aarch64_be.

I didn't look at the patch in details.

Christophe


>
> - Thanks and regards,
>   Sameera D.
>
> 2017-12-14 22:17 GMT+05:30 Christophe Lyon :
>> 2017-12-14 9:29 GMT+01:00 Sameera Deshpande :
>>> Hi!
>>>
>>> Please find attached the patch implementing vld1_*_x3, vst1_*_x2 and
>>> vst1_*_x3 intrinsics as defined by Neon document.
>>>
>>> Ok for trunk?
>>>
>>> - Thanks and regards,
>>>   Sameera D.
>>>
>>> gcc/Changelog:
>>>
>>> 2017-11-14  Sameera Deshpande  
>>>
>>>
>>> * config/aarch64/aarch64-simd-builtins.def (ld1x3): New.
>>> (st1x2): Likewise.
>>> (st1x3): Likewise.
>>> * config/aarch64/aarch64-simd.md
>>> (aarch64_ld1x3): New pattern.
>>> (aarch64_ld1_x3_): Likewise
>>> (aarch64_st1x2): Likewise
>>> (aarch64_st1_x2_): Likewise
>>> (aarch64_st1x3): Likewise
>>> (aarch64_st1_x3_): Likewise
>>> * config/aarch64/arm_neon.h (vld1_u8_x3): New function.
>>> (vld1_s8_x3): Likewise.
>>> (vld1_u16_x3): Likewise.
>>> (vld1_s16_x3): Likewise.
>>> (vld1_u32_x3): Likewise.
>>> (vld1_s32_x3): Likewise.
>>> (vld1_u64_x3): Likewise.
>>> (vld1_s64_x3): Likewise.
>>> (vld1_fp16_x3): Likewise.
>>> (vld1_f32_x3): Likewise.
>>> (vld1_f64_x3): Likewise.
>>> (vld1_p8_x3): Likewise.
>>> (vld1_p16_x3): Likewise.
>>> (vld1_p64_x3): Likewise.
>>> (vld1q_u8_x3): Likewise.
>>> (vld1q_s8_x3): Likewise.
>>> (vld1q_u16_x3): Likewise.
>>> (vld1q_s16_x3): Likewise.
>>> (vld1q_u32_x3): Likewise.
>>> (vld1q_s32_x3): Likewise.
>>> (vld1q_u64_x3): Likewise.
>>> (vld1q_s64_x3): Likewise.
>>> (vld1q_f16_x3): Likewise.
>>> (vld1q_f32_x3): Likewise.
>>> (vld1q_f64_x3): Likewise.
>>> (vld1q_p8_x3): Likewise.
>>> (vld1q_p16_x3): Likewise.
>>> (vld1q_p64_x3): Likewise.
>>> (vst1_s64_x2): Likewise.
>>> (vst1_u64_x2): Likewise.
>>> (vst1_f64_x2): 
>>> Likewise.patchurl=http://people.linaro.org/~christophe.lyon/armv8_2-fp16-scalar-2.patch3
patchname=armv8_2-fp16-scalar-2.patch3
refrev=259064
email_to=christophe.l...@linaro.org

>>> (vst1_s8_x2): Likewise.
>>> (vst1_p8_x2): Likewise.
>>> (vst1_s16_x2): Likewise.
>>> (vst1_p16_x2): Likewise.
>>> (vst1_s32_x2): Likewise.
>>> (vst1_u8_x2): Likewise.
>>> (vst1_u16_x2): Likewise.
>>> (vst1_u32_x2): Likewise.
>>> (vst1_f16_x2): Likewise.
>>> (vst1_f32_x2): Likewise.
>>> (vst1_p64_x2): Likewise.
>>> (vst1q_s8_x2): Likewise.
>>> (vst1q_p8_x2): Likewise.
>>> (vst1q_s16_x2): Likewise.
>>> (vst1q_p16_x2): Likewise.
>>> (vst1q_s32_x2): Likewise.
>>> (vst1q_s64_x2): Likewise.
>>> (vst1q_u8_x2): Likewise.
>>> (vst1q_u16_x2): Likewise.
>>> (vst1q_u32_x2): Likewise.
>>> (vst1q_u64_x2): Likewise.
>>> (vst1q_f16_x2): Likewise.
>>> (vst1q_f32_x2): Likewise.
>>> (vst1q_f64_x2): Likewise.
>>> (vst1q_p64_x2): Likewise.
>>> (vst1_s64_x3): Likewise.
>>> (vst1_u64_x3): Likewise.
>>> (vst1_f64_x3): Likewise.
>>> (vst1_s8_x3): Likewise.
>>> (vst1_p8_x3): Likewise.
>>> (vst1_s16_x3): Likewise.
>>> (vst1_p16_x3): Likewise.
>>> (vst1_s32_x3): Likewise.
>>> (vst1_u8_x3): Likewise.
>>> (vst1_u16_x3): Likewise.
>>> (vst1_u32_x3): Likewise.
>>> (vst1_f16_x3): Likewise.
>>> (vst1_f32_x3): Likewise.
>>> (vst1_p64_x3): Likewise.
>>> (vst1q_s8_x3): Likewise.
>>> (vst1q_p8_x3): Likewise.
>>> (vst1q_s16_x3): Likewise.
>>> (vst1q_p16_x3): Likewise.
>>> (vst1q_s32_x3): Likewise.
>>> (vst1q_s64_x3): Likewise.
>>> (vst1q_u8_x3): Likewise.
>>> (vst1q_u16_x3): Likewise.
>>> (vst1q_u32_x3): Likewise.
>>> (vst1q_u64_x3): Likewise.
>>> (vst1q_f16_x3): Likewise.
>>> (vst1q_f32_x3): Likewise.
>>> (vst1q_f64_x3): Likewise.
>>> (vst1q_p64_x3): Likewise.
>>
>> Hi,
>> I'm not a maintainer, but I suspect you should add some tests.
>>
>> Christophe
>
>
>
> --
> - Thanks and regards,
>   Sameera D.

[PATCH] config.gcc (x86_64--rtems): Add rtems.h to tm_file

2018-04-06 Thread Amaan Cheval

Hi!

All the gcc targets for RTEMS include gcc/config/rtems.h in tm_file to add
specific linker options using LIB_SPEC.

This patch simply intends to add the same to the x86_64 target.

There are no tests in this patch because I don't see any tests for any of the
other RTEMS targets - let me know if you'd be interested in a patch for that,
and I can look into adding general tests for all the RTEMS targets or just
specific ones that _must_ support these switches - Joel and Sebastian may be
able to shed light on which it should be, if any.

P.S. - I've also added this patch to rtems-source-builder and built gcc to
verify that it works (in that the new switches do not throw "unrecognized
command line option" errors anymore, at least). Let me know if you'd like a
patch to test with rtems-source-builder, if that makes it easier for you to
verify.

Thanks!

gcc/ChangeLog:

2018-04-07  Amaan Cheval  

* config.gcc (x86_64-*-rtems*): Add rtems.h to tm_file for
custom LIB_SPEC setup.

Index: gcc/config.gcc
===
--- gcc/config.gcc  (revision 259188)
+++ gcc/config.gcc  (working copy)
@@ -1496,7 +1496,7 @@ x86_64-*-elf*)
tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h newlib-stdint.h 
i386/i386elf.h i386/x86-64.h"
;;
 x86_64-*-rtems*)
-   tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h 
newlib-stdint.h i386/i386elf.h i386/x86-64.h i386/rtemself.h"
+   tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h 
newlib-stdint.h i386/i386elf.h i386/x86-64.h i386/rtemself.h rtems.h"
;;
 i[34567]86-*-rdos*)
 tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h 
newlib-stdint.h i386/i386elf.h i386/rdos.h"

Re: [PATCH v2] RISC-V: Support for FreeBSD

2018-04-06 Thread Jim Wilson

On Thu, Apr 5, 2018 at 8:48 PM, Kito Cheng  wrote:
> Theodore Teah sent an mail say "Your assignment/disclaimer process
> with the FSF is currently complete.".
>
> Could you help us to commit that?

Committed.  With some riscv{32,64}-{elf,linux,freebsd} cross compiler
build tests, to make sure the patch is still OK with current sources.

I also noticed that Nathan Sidwell accidentally truncated the
copyright message at the end of the gcc/ChangeLog file back on March
21, and I committed a fix for that too.

Jim

[PATCH] Fix native_encode_expr and sccvn caller (PR tree-optimization/85257)

2018-04-06 Thread Jakub Jelinek

Hi!

On the following testcase, we try to read from a huge VECTOR_CST that
doesn't fit into 64 bytes and read completely random number out of it.

The issue is that native_encode_expr has 2 modes of operation, when
called with 3 arguments, it is supposed to encode the whole object or
nothing (i.e. return 0 on failure or the whole size on success), and
when called with 4 arguments, it can encode just a portion thereof (is given
offset at which to start and returns the actually encoded length from that
spot, which can be smaller than the whole object's size).

sccvn was using the first mode, unfortunately native_encode_vector had a bug
where it the length happened to be exactly on the boundary between two
VECTOR_CST elements, it could return smaller len (thus surprising callers
which assumed 0 or everything).  This is fixed by the first hunk.

Though, in sccvn case, using the 3 argument native_encode_expr is
unnecessary, we know the offset and size we want to interpret from it,
so the second 2 hunks optimize it; this way, we can read even from the
256-byte long vector with just 64-byte buffer and optimize the testcase.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2018-04-06  Jakub Jelinek  

PR tree-optimization/85257
* fold-const.c (native_encode_vector): If not all elts could fit
and off is -1, return 0 rather than offset.
* tree-ssa-sccvn.c (vn_reference_lookup_3): Pass
(offseti - offset2) / BITS_PER_UNIT as 4th argument to
native_encode_expr.  Verify len * BITS_PER_UNIT >= maxsizei.  Don't
adjust buffer in native_interpret_expr call.

* gcc.dg/pr85257.c: New test.

--- gcc/fold-const.c.jj 2018-04-06 13:23:30.622190581 +0200
+++ gcc/fold-const.c2018-04-06 17:00:52.810460085 +0200
@@ -7307,7 +7307,7 @@ native_encode_vector (const_tree expr, u
return 0;
   offset += res;
   if (offset >= len)
-   return offset;
+   return (off == -1 && i < count - 1) ? 0 : offset;
   if (off != -1)
off = 0;
 }
--- gcc/tree-ssa-sccvn.c.jj 2018-04-04 10:23:59.968294555 +0200
+++ gcc/tree-ssa-sccvn.c2018-04-06 17:07:44.633489528 +0200
@@ -2038,8 +2038,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree
  if (TREE_CODE (rhs) == SSA_NAME)
rhs = SSA_VAL (rhs);
  len = native_encode_expr (gimple_assign_rhs1 (def_stmt),
-   buffer, sizeof (buffer));
- if (len > 0)
+   buffer, sizeof (buffer),
+   (offseti - offset2) / BITS_PER_UNIT);
+ if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
{
  tree type = vr->type;
  /* Make sure to interpret in a type that has a range
@@ -2048,10 +2049,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
  && maxsizei != TYPE_PRECISION (vr->type))
type = build_nonstandard_integer_type (maxsizei,
   TYPE_UNSIGNED (type));
- tree val = native_interpret_expr (type,
-   buffer
-   + ((offseti - offset2)
-  / BITS_PER_UNIT),
+ tree val = native_interpret_expr (type, buffer,
maxsizei / BITS_PER_UNIT);
  /* If we chop off bits because the types precision doesn't
 match the memory access size this is ok when optimizing
--- gcc/testsuite/gcc.dg/pr85257.c.jj   2018-04-06 17:10:42.710500305 +0200
+++ gcc/testsuite/gcc.dg/pr85257.c  2018-04-06 17:10:11.621498423 +0200
@@ -0,0 +1,20 @@
+/* PR tree-optimization/85257 */
+/* { dg-do run { target int128 } } */
+/* { dg-options "-O2 -fno-tree-ccp" } */
+
+typedef __int128 V __attribute__ ((__vector_size__ (16 * sizeof (__int128;
+
+__int128 __attribute__ ((noipa))
+foo (void)
+{
+  V v = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+  return v[5];
+}
+
+int
+main ()
+{
+  if (foo () != 6)
+__builtin_abort ();
+  return 0;
+}

Jakub

Re: [PATCH] config.gcc (x86_64--rtems): Add rtems.h to tm_file

2018-04-06 Thread Joel Sherrill

Thanks for submitting the patch. This patch is OK to merge to the
master and all open branches that have this target.

A corresponding patch for the RTEMS Source Builder is necessary
because a gcc release with this patch won't be available for a while.

I am starting a build with this now.  If Sebastian pushes it before me,
that's OK.

--joel

On Fri, Apr 6, 2018 at 3:05 PM, Amaan Cheval  wrote:

> Hi!
>
> All the gcc targets for RTEMS include gcc/config/rtems.h in tm_file to add
> specific linker options using LIB_SPEC.
>
> This patch simply intends to add the same to the x86_64 target.
>
> There are no tests in this patch because I don't see any tests for any of
> the
> other RTEMS targets - let me know if you'd be interested in a patch for
> that,
> and I can look into adding general tests for all the RTEMS targets or just
> specific ones that _must_ support these switches - Joel and Sebastian may
> be
> able to shed light on which it should be, if any.
>
> P.S. - I've also added this patch to rtems-source-builder and built gcc to
> verify that it works (in that the new switches do not throw "unrecognized
> command line option" errors anymore, at least). Let me know if you'd like a
> patch to test with rtems-source-builder, if that makes it easier for you to
> verify.
>
> Thanks!
>
> gcc/ChangeLog:
>
> 2018-04-07  Amaan Cheval  
>
> * config.gcc (x86_64-*-rtems*): Add rtems.h to tm_file for
> custom LIB_SPEC setup.
>
> Index: gcc/config.gcc
> ===
> --- gcc/config.gcc  (revision 259188)
> +++ gcc/config.gcc  (working copy)
> @@ -1496,7 +1496,7 @@ x86_64-*-elf*)
> tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h
> newlib-stdint.h i386/i386elf.h i386/x86-64.h"
> ;;
>  x86_64-*-rtems*)
> -   tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h
> newlib-stdint.h i386/i386elf.h i386/x86-64.h i386/rtemself.h"
> +   tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h
> newlib-stdint.h i386/i386elf.h i386/x86-64.h i386/rtemself.h rtems.h"
> ;;
>  i[34567]86-*-rdos*)
>  tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h
> newlib-stdint.h i386/i386elf.h i386/rdos.h"
>

Re: [PATCH] config.gcc (x86_64--rtems): Add rtems.h to tm_file

2018-04-06 Thread Amaan Cheval

On Sat, Apr 7, 2018 at 1:49 AM Joel Sherrill  wrote:

> Thanks for submitting the patch. This patch is OK to merge to the
> master and all open branches that have this target.

> A corresponding patch for the RTEMS Source Builder is necessary
> because a gcc release with this patch won't be available for a while.

Yep, I figured it'd be the same style as we have for some of the other
patches that refer directly to the gcc.git commit:
https://git.rtems.org/rtems-source-builder/tree/rtems/config/tools/rtems-gcc-7.2.0-newlib-2.5.0.20170922-1.cfg#n24

1. Is that right? Or would we rather commit the patch into
rtems-source-builder and use "--rsb-file"? I imagine it doesn't really make
a difference, so I'll just go with the former if you don't have a
preference.
2. Would only the "rtems-gcc-7.3.0-newlib-3.0.0.cfg" file need the patch
(since it seems to be what the rtems-source-builder uses for the
"5/rtems-x86_64" buildset[1][2]), or would there be more? (I don't see the
need to support older versions since the port won't exist for a while, so
the tools don't need to be updated backwards either.)

[1]
https://git.rtems.org/rtems-source-builder/tree/rtems/config/5/rtems-x86_64.bset#n4
[2]
https://git.rtems.org/rtems-source-builder/tree/rtems/config/5/rtems-default.bset#n14


> I am starting a build with this now.  If Sebastian pushes it before me,
> that's OK.

> --joel

> On Fri, Apr 6, 2018 at 3:05 PM, Amaan Cheval 
wrote:

>> Hi!

>> All the gcc targets for RTEMS include gcc/config/rtems.h in tm_file to
add
>> specific linker options using LIB_SPEC.

>> This patch simply intends to add the same to the x86_64 target.

>> There are no tests in this patch because I don't see any tests for any
of the
>> other RTEMS targets - let me know if you'd be interested in a patch for
that,
>> and I can look into adding general tests for all the RTEMS targets or
just
>> specific ones that _must_ support these switches - Joel and Sebastian
may be
>> able to shed light on which it should be, if any.

>> P.S. - I've also added this patch to rtems-source-builder and built gcc
to
>> verify that it works (in that the new switches do not throw "unrecognized
>> command line option" errors anymore, at least). Let me know if you'd
like a
>> patch to test with rtems-source-builder, if that makes it easier for you
to
>> verify.

>> Thanks!

>> gcc/ChangeLog:

>> 2018-04-07  Amaan Cheval  

>>  * config.gcc (x86_64-*-rtems*): Add rtems.h to tm_file for
>>  custom LIB_SPEC setup.

>> Index: gcc/config.gcc
>> ===
>> --- gcc/config.gcc  (revision 259188)
>> +++ gcc/config.gcc  (working copy)
>> @@ -1496,7 +1496,7 @@ x86_64-*-elf*)
>>  tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h
newlib-stdint.h i386/i386elf.h i386/x86-64.h"
>>  ;;
>>   x86_64-*-rtems*)
>> -   tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h
newlib-stdint.h i386/i386elf.h i386/x86-64.h i386/rtemself.h"
>> +   tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h
newlib-stdint.h i386/i386elf.h i386/x86-64.h i386/rtemself.h rtems.h"
>>  ;;
>>   i[34567]86-*-rdos*)
>>   tm_file="${tm_file} i386/unix.h i386/att.h dbxelf.h elfos.h
newlib-stdint.h i386/i386elf.h i386/rdos.h"

New German PO file for 'gcc' (version 8.1-b20180401)

2018-04-06 Thread Translation Project Robot

Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the German team of translators.  The file is available at:

http://translationproject.org/latest/gcc/de.po

(This file, 'gcc-8.1-b20180401.de.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

http://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

http://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.

[SPARC] Fix PR target/85196

2018-04-06 Thread Eric Botcazou

This fixes a regression present on the 7 and 6 branches in the form of an ICE 
on an unrecognizable insn generated for a jump table in PIC mode.  It occurs 
because the index of the table is folded to a constant at RTL expansion time 
and not at the GIMPLE level (this pessimization is fixed on the mainline).

Tested on SPARC64/Linux, applied on all active branches.


2018-04-06  Eric Botcazou  

PR target/85196
* config/sparc/sparc.c (sparc_expand_move): Deal with symbolic operands
based on LABEL_REF.  Remove useless assertion.
(pic_address_needs_scratch): Fix formatting.
(sparc_legitimize_pic_address): Minor tweaks.
(sparc_delegitimize_address): Adjust assertion accordingly.
* config/sparc/sparc.md (movsi_pic_label_ref): Change label_ref_operand
into symbolic_operand.
(movsi_high_pic_label_ref): Likewise.
(movsi_lo_sum_pic_label_ref): Likewise.
(movdi_pic_label_ref): Likewise.
(movdi_high_pic_label_ref): Likewise.
(movdi_lo_sum_pic_label_ref): Likewise.


2018-04-06  Eric Botcazou  

* g++.dg/opt/pr85196.C: New test.

-- 
Eric BotcazouIndex: config/sparc/sparc.c
===
--- config/sparc/sparc.c	(revision 259025)
+++ config/sparc/sparc.c	(working copy)
@@ -2236,7 +2236,7 @@ sparc_expand_move (machine_mode mode, rt
 	}
 }
 
-  /* Fixup TLS cases.  */
+  /* Fix up TLS cases.  */
   if (TARGET_HAVE_TLS
   && CONSTANT_P (operands[1])
   && sparc_tls_referenced_p (operands [1]))
@@ -2245,15 +2245,20 @@ sparc_expand_move (machine_mode mode, rt
   return false;
 }
 
-  /* Fixup PIC cases.  */
+  /* Fix up PIC cases.  */
   if (flag_pic && CONSTANT_P (operands[1]))
 {
   if (pic_address_needs_scratch (operands[1]))
 	operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
 
   /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases.  */
-  if (GET_CODE (operands[1]) == LABEL_REF
-	  && can_use_mov_pic_label_ref (operands[1]))
+  if ((GET_CODE (operands[1]) == LABEL_REF
+	   && can_use_mov_pic_label_ref (operands[1]))
+	  || (GET_CODE (operands[1]) == CONST
+	  && GET_CODE (XEXP (operands[1], 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
+	  && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
+	  && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0
 	{
 	  if (mode == SImode)
 	{
@@ -2263,7 +2268,6 @@ sparc_expand_move (machine_mode mode, rt
 
 	  if (mode == DImode)
 	{
-	  gcc_assert (TARGET_ARCH64);
 	  emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
 	  return true;
 	}
@@ -4280,10 +4284,11 @@ int
 pic_address_needs_scratch (rtx x)
 {
   /* An address which is a symbolic plus a non SMALL_INT needs a temp reg.  */
-  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+  if (GET_CODE (x) == CONST
+  && GET_CODE (XEXP (x, 0)) == PLUS
   && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
-  && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
+  && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
 return 1;
 
   return 0;
@@ -4750,16 +4755,15 @@ sparc_legitimize_tls_address (rtx addr)
 static rtx
 sparc_legitimize_pic_address (rtx orig, rtx reg)
 {
-  bool gotdata_op = false;
-
   if (GET_CODE (orig) == SYMBOL_REF
   /* See the comment in sparc_expand_move.  */
   || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
 {
+  bool gotdata_op = false;
   rtx pic_ref, address;
   rtx_insn *insn;
 
-  if (reg == 0)
+  if (!reg)
 	{
 	  gcc_assert (can_create_pseudo_p ());
 	  reg = gen_reg_rtx (Pmode);
@@ -4770,8 +4774,7 @@ sparc_legitimize_pic_address (rtx orig,
 	  /* If not during reload, allocate another temp reg here for loading
 	 in the address, so that these instructions can be optimized
 	 properly.  */
-	  rtx temp_reg = (! can_create_pseudo_p ()
-			  ? reg : gen_reg_rtx (Pmode));
+	  rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
 
 	  /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
 	 won't get confused into thinking that these two instructions
@@ -4787,6 +4790,7 @@ sparc_legitimize_pic_address (rtx orig,
 	  emit_insn (gen_movsi_high_pic (temp_reg, orig));
 	  emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
 	}
+
 	  address = temp_reg;
 	  gotdata_op = true;
 	}
@@ -4827,7 +4831,7 @@ sparc_legitimize_pic_address (rtx orig,
 	  && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
 	return orig;
 
-  if (reg == 0)
+  if (!reg)
 	{
 	  gcc_assert (can_create_pseudo_p ());
 	  reg = gen_reg_rtx (Pmode);
@@ -4935,7 +4939,11 @@ sparc_delegitimize_address (rtx x)
   && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
 {
   x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
-

Re: [PATCH] Fix native_encode_expr and sccvn caller (PR tree-optimization/85257)

2018-04-06 Thread Richard Biener

On April 6, 2018 10:14:37 PM GMT+02:00, Jakub Jelinek  wrote:
>Hi!
>
>On the following testcase, we try to read from a huge VECTOR_CST that
>doesn't fit into 64 bytes and read completely random number out of it.
>
>The issue is that native_encode_expr has 2 modes of operation, when
>called with 3 arguments, it is supposed to encode the whole object or
>nothing (i.e. return 0 on failure or the whole size on success), and
>when called with 4 arguments, it can encode just a portion thereof (is
>given
>offset at which to start and returns the actually encoded length from
>that
>spot, which can be smaller than the whole object's size).
>
>sccvn was using the first mode, unfortunately native_encode_vector had
>a bug
>where it the length happened to be exactly on the boundary between two
>VECTOR_CST elements, it could return smaller len (thus surprising
>callers
>which assumed 0 or everything).  This is fixed by the first hunk.
>
>Though, in sccvn case, using the 3 argument native_encode_expr is
>unnecessary, we know the offset and size we want to interpret from it,
>so the second 2 hunks optimize it; this way, we can read even from the
>256-byte long vector with just 64-byte buffer and optimize the
>testcase.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK. 

Richard. 

>2018-04-06  Jakub Jelinek  
>
>   PR tree-optimization/85257
>   * fold-const.c (native_encode_vector): If not all elts could fit
>   and off is -1, return 0 rather than offset.
>   * tree-ssa-sccvn.c (vn_reference_lookup_3): Pass
>   (offseti - offset2) / BITS_PER_UNIT as 4th argument to
>   native_encode_expr.  Verify len * BITS_PER_UNIT >= maxsizei.  Don't
>   adjust buffer in native_interpret_expr call.
>
>   * gcc.dg/pr85257.c: New test.
>
>--- gcc/fold-const.c.jj2018-04-06 13:23:30.622190581 +0200
>+++ gcc/fold-const.c   2018-04-06 17:00:52.810460085 +0200
>@@ -7307,7 +7307,7 @@ native_encode_vector (const_tree expr, u
>   return 0;
>   offset += res;
>   if (offset >= len)
>-  return offset;
>+  return (off == -1 && i < count - 1) ? 0 : offset;
>   if (off != -1)
>   off = 0;
> }
>--- gcc/tree-ssa-sccvn.c.jj2018-04-04 10:23:59.968294555 +0200
>+++ gcc/tree-ssa-sccvn.c   2018-04-06 17:07:44.633489528 +0200
>@@ -2038,8 +2038,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree
> if (TREE_CODE (rhs) == SSA_NAME)
>   rhs = SSA_VAL (rhs);
> len = native_encode_expr (gimple_assign_rhs1 (def_stmt),
>-  buffer, sizeof (buffer));
>-if (len > 0)
>+  buffer, sizeof (buffer),
>+  (offseti - offset2) / BITS_PER_UNIT);
>+if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
>   {
> tree type = vr->type;
> /* Make sure to interpret in a type that has a range
>@@ -2048,10 +2049,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
> && maxsizei != TYPE_PRECISION (vr->type))
>   type = build_nonstandard_integer_type (maxsizei,
>  TYPE_UNSIGNED (type));
>-tree val = native_interpret_expr (type,
>-  buffer
>-  + ((offseti - offset2)
>- / BITS_PER_UNIT),
>+tree val = native_interpret_expr (type, buffer,
>   maxsizei / BITS_PER_UNIT);
> /* If we chop off bits because the types precision doesn't
>match the memory access size this is ok when optimizing
>--- gcc/testsuite/gcc.dg/pr85257.c.jj  2018-04-06 17:10:42.710500305
>+0200
>+++ gcc/testsuite/gcc.dg/pr85257.c 2018-04-06 17:10:11.621498423 +0200
>@@ -0,0 +1,20 @@
>+/* PR tree-optimization/85257 */
>+/* { dg-do run { target int128 } } */
>+/* { dg-options "-O2 -fno-tree-ccp" } */
>+
>+typedef __int128 V __attribute__ ((__vector_size__ (16 * sizeof
>(__int128;
>+
>+__int128 __attribute__ ((noipa))
>+foo (void)
>+{
>+  V v = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
>+  return v[5];
>+}
>+
>+int
>+main ()
>+{
>+  if (foo () != 6)
>+__builtin_abort ();
>+  return 0;
>+}
>
>   Jakub

63 matches

Mail list logo