[PATCH] S/390: Implement vectory copysign

2019-02-07 Thread Robin Dapp
Hi,

this patch implements vector copysign using vector select on S/390.

Regtested and bootstrapped on s390x.

Regards
 Robin

--

gcc/ChangeLog:

2019-02-07  Robin Dapp  

* config/s390/vector.md: Implement vector copysign.

gcc/testsuite/ChangeLog:

2019-02-07  Robin Dapp  

* gcc.target/s390/vector/vec-copysign-execute.c: New test.
* gcc.target/s390/vector/vec-copysign.c: New test.
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index c9ffab4c8c2..820372eca29 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -1362,6 +1362,31 @@
   operands[4] = CONST0_RTX (V2DImode);
 })
 
+; Vector copysign, implement using vector select
+(define_expand "copysign3"
+  [(set (match_operand:VFT 0 "register_operand" "")
+	(if_then_else:VFT
+	 (eq (match_dup 3)
+	 (match_dup 4))
+	 (match_operand:VFT 1 "register_operand"  "")
+	 (match_operand:VFT 2 "register_operand"  "")))]
+  "TARGET_VX"
+{
+  int sz = GET_MODE_BITSIZE (GET_MODE_INNER (mode));
+  int prec = GET_MODE_PRECISION (GET_MODE_INNER (mode));
+  wide_int mask_val = wi::shwi (1l << (sz - 1), prec);
+
+  rtx mask = gen_reg_rtx (mode);
+
+  int nunits = GET_MODE_NUNITS (mode);
+  rtvec v = rtvec_alloc (nunits);
+  for (int i = 0; i < nunits; i++)
+RTVEC_ELT (v, i) = GEN_INT (mask_val.to_shwi ());
+
+  mask = gen_rtx_CONST_VECTOR (mode, v);
+  operands[3] = force_reg (mode, mask);
+  operands[4] = CONST0_RTX (mode);
+})
 
 ;;
 ;; Integer compares
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-copysign-execute.c b/gcc/testsuite/gcc.target/s390/vector/vec-copysign-execute.c
new file mode 100644
index 000..a8d675d3a72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-copysign-execute.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -mzarch -march=z13" } */
+
+#include 
+#include 
+
+#define N 20
+
+double a[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9,
+27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan("123"), __DBL_MIN__ / 2.0,
+-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199};
+double b[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0,
+7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3};
+double r[N];
+double r2[N];
+
+void
+foo (void)
+{
+  for (int i = 0; i < N; i++)
+r[i] = copysign (a[i], b[i]);
+}
+
+__attribute__((optimize("no-tree-vectorize")))
+void
+check (void)
+{
+  for (int i = 0; i < N; i++)
+{
+  r2[i] = copysign (a[i], b[i]);
+  assert (r[i] == r2[i]
+	  || (isnan (r[i]) && isnan (r2[i])
+		  && signbit (r[i]) == signbit (r2[i])));
+}
+}
+
+float af[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9,
+27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan("123"), __DBL_MIN__ / 2.0,
+-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199};
+float bf[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0,
+7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3};
+float rf[N];
+float rf2[N];
+
+__attribute__ ((__target__ ("arch=z14")))
+void
+foof (void)
+{
+  for (int i = 0; i < N; i++)
+rf[i] = copysignf (af[i], bf[i]);
+}
+
+__attribute__((optimize("no-tree-vectorize")))
+void
+checkf (void)
+{
+  for (int i = 0; i < N; i++)
+{
+  rf2[i] = copysignf (af[i], bf[i]);
+  assert (rf[i] == rf2[i]
+	  || (isnan (rf[i]) && isnan (rf2[i])
+		  && signbit (rf[i]) == signbit (rf2[i])));
+}
+}
+
+int main()
+{
+  foo ();
+  check ();
+
+  foof ();
+  checkf ();
+  return r[0];
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-copysign.c b/gcc/testsuite/gcc.target/s390/vector/vec-copysign.c
new file mode 100644
index 000..64c6970c23e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-copysign.c
@@ -0,0 +1,38 @@
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O2 -ftree-vectorize -mzarch" } */
+/* { dg-final { scan-assembler-times "vgmg" 1 } } */
+/* { dg-final { scan-assembler-times "vgmf" 1 } } */
+/* { dg-final { scan-assembler-times "vsel" 2 } } */
+
+#include 
+
+#define N 20
+
+double a[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9,
+27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan ("123"), __DBL_MIN__ / 2.0,
+-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199};
+double b[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0,
+7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3};
+double r[N];
+float af[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9,
+27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan ("123"), __DBL_MIN__ / 2.0,
+-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199};
+float bf[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0,
+7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3};
+float rf[N];
+
+__attribute__ ((__target__ ("arch=z13")))
+void
+foo (void)
+{
+  for (int i = 0; i < N; i++)
+r[i] = copysign (a[i], b[i]);
+}
+
+__attribute__ ((__target__ ("arch=z14")))
+void
+foof (void)
+{
+  for (int i = 0; i < N;

[PATCH][arm] Use neon_dot_q type for 128-bit V[US]DOT instructions where appropriate

2019-02-07 Thread Kyrill Tkachov

Hi all,

For the Dot Product instructions we have the scheduling types neon_dot and 
neon_dot_q for the 128-bit versions.
It seems that we're only using the former though, not assigning the neon_dot_q 
type anywhere.

This patch fixes that by adding the  mode attribute suffix to the type, 
similar to how we do it for other
types in neon.md.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Committing to trunk.

Thanks,
Kyrill

2019-07-02  Kyrylo Tkachov  

* config/arm/neon.md (neon_dot): Use neon_dot for type.
(neon_dot_lane): Likewise.
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index f9d7ba35b137fed383f84eecbe81dd942943d216..4a2c7b99881e96fbff30a53d370ff0df1416c124 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -3542,7 +3542,7 @@ (define_insn "neon_dot"
 		DOTPROD)))]
   "TARGET_DOTPROD"
   "vdot.\\t%0, %2, %3"
-  [(set_attr "type" "neon_dot")]
+  [(set_attr "type" "neon_dot")]
 )
 
 ;; These instructions map to the __builtins for the Dot Product
@@ -3561,7 +3561,7 @@ (define_insn "neon_dot_lane"
   = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
 return "vdot.\\t%0, %2, %P3[%c4]";
   }
-  [(set_attr "type" "neon_dot")]
+  [(set_attr "type" "neon_dot")]
 )
 
 ;; These expands map to the Dot Product optab the vectorizer checks for.


Re: [REVISED PATCH 5/9]: C++ P0482R5 char8_t: Standard library support

2019-02-07 Thread Jonathan Wakely

On 23/12/18 21:27 -0500, Tom Honermann wrote:
Attached is a revised patch that addresses changes in P0482R6.  
Changes from the prior patch include:

- Updated the value of the __cpp_char8_t feature test macro to 201811.

Tested on x86_64-linux.


Thanks, Tom, this is great work!

The front-end changes for char8_t went in recently, and I'm finally
ready to commit the library parts. There's one big problem I found in
this patch, which is that the new numeric_limits
specialization uses constexpr unconditionally. That fails if 
is compiled using options like -std=c++98 -fno-char8_t because the
specialization will be used, but the constexpr keyword isn't allowed.
That's easily fixed by replacing the keyword with _GLIBCXX_CONSTEXPR.

The other way to solve that problem would be for the compiler to give
an error if -fchar8_t is used with C++98, but I see no fundamental
reason that combination of options shouldn't be allowed. We can
support it in the library by using the macro.

As discussed in San Diego, the other change needed is to add the
abi_tag attribute to the new versions of path::u8string and
path::generic_u8string, so that the mangling is different when its
return type is different:

#ifdef _GLIBCXX_USE_CHAR8_T
   __attribute__((__abi_tag__("__u8")))
   std::u8string  u8string() const;
#else
   std::stringu8string() const;
#endif // _GLIBCXX_USE_CHAR8_T

Otherwise we get ODR violations when linking objects compiled
with -fchar8_t enabled to objects with it disabled (e.g. linking
-std=c++17 objects to -std=c++2a objects, which needs to work).

I suggest "__u8" as the name of the ABI tag, but I'm open to other
suggestions. "__char8_t" is a bit long and verbose. "__cxx20" would be
consistent with "__cxx11" used for the new ABI introduced in GCC 5 but
it regularly confuses people who think it is coupled to the -std=c++11
option (and so don't understand why they still see it for -std=c++14).

Also, I see that you've made changes to  (to
add the experimental::u8string_view typedef) and to
std::experimental::path (to change the return type of u8string and
generic_u8string).

The former change is fairly harmless; it only adds a typedef, albeit
one which is not a reserved name in C++14/C++17 and so should be
available for users to define as a macro. Maybe prior to C++2a we
should only define it when GNU extensions are enabled (i.e. when using
-std=gnu++14 not -std=c++14):

#if defined _GLIBCXX_USE_CHAR8_T \
 && (__cplusplus > 201703L || !defined __STRICT_ANSI__)
 using u8string_view = basic_string_view;
#endif

Changing the return type of experimental::path members concerns me
more. That's a published TS which is not going to be revised, and it's
not obvious to me that users would want the change in semantics. If
somebody is still using the Filesystem TS in C++2a code, they're
probably not expecting it to change. If they need to update their code
for C++2a they might as well just use std::filesystem, and so having
char8_t support in std::experimental::filesystem isn't clearly useful.



Re: [PATCH] correct __clear_cache signature

2019-02-07 Thread Segher Boessenkool
Hi Martin,

On Wed, Feb 06, 2019 at 05:28:08PM -0700, Martin Sebor wrote:
>  void
> -__clear_cache (char *beg __attribute__((__unused__)),
> -char *end __attribute__((__unused__)))
> +__clear_cache (void *beg __attribute__((__unused__)),
> +void *end __attribute__((__unused__)))
>  {
>  #ifdef CLEAR_INSN_CACHE
> -  CLEAR_INSN_CACHE (beg, end);
> +  /* Cast the void* pointers to char* as some implementations
> + of the macro assume the pointers can be subtracted from
> + one another.  */
> +  CLEAR_INSN_CACHE ((char *) beg, (char *) end);
>  #endif /* CLEAR_INSN_CACHE */
>  }

You can subtract pointers to void in GCC just fine, it's a GNU C extension.
See extend.texi:

  In GNU C, addition and subtraction operations are supported on pointers to
  @code{void} and on pointers to functions.  This is done by treating the
  size of a @code{void} or of a function as 1.

(and libgcc is built with GCC always).


Segher


Re: [PATCH][AArch64] Change representation of SABD in RTL

2019-02-07 Thread Kyrill Tkachov

Hi James,

On 06/02/19 17:33, James Greenhalgh wrote:

On Mon, Feb 04, 2019 at 04:23:32AM -0600, Kyrill Tkachov wrote:

Hi all,

Richard raised a concern about the RTL we use to represent the AdvSIMD SABD
(vector signed absolute difference) instruction.
We currently represent it as ABS (MINUS op1 op2).

This isn't exactly what SABD does. ABS treats its input as a signed value
and returns the absolute of that.

For example:
(sabd:QI 64 -128) == 192 (unsigned) aka -64 (signed)
whereas
(minus:QI 64 -128) == 192 (unsigned) aka -64 (signed), (abs ...) of that is 64.

A better way to describe the instruction is with MINUS (SMAX (op1 op2) SMIN 
(op1 op2)).
This patch implements that, and also implements similar semantics for the UABD 
instruction
that uses UMAX and UMIN.

That way for the example above we'll have:
(minus:QI (smax:QI (64 -128)) (smin:QI (64 -128))) == (minus:QI 64 -128) == 192 
(or -64 signed) which matches
what SABD does.

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk?

Not without a comment explaining the above subtlety and preferably a
testcase which would fail today on trunk.

Otherwise, OK.


I'll take that as approval with the changes requested.
I'll commit the updated patch (attached) later today.

Thanks,
Kyrill

2019-02-07  Kyrylo Tkachov  

* config/aarch64/iterators.md (max_opp): New code_attr.
(USMAX): New code iterator.
* config/aarch64/predicates.md (aarch64_smin): New predicate.
(aarch64_smax): Likewise.
* config/aarch64/aarch64-simd.md (abd_3): Rename to...
(*aarch64_abd_3): ... Change RTL representation to
MINUS (MAX MIN).

2019-02-07  Kyrylo Tkachov  

* gcc.target/aarch64/abd_1.c: New test.
* gcc.dg/sabd_1.c: Likewise.


James


Thanks,
Kyrill

2019-04-02  Kyrylo Tkachov  

  * config/aarch64/iterators.md (max_opp): New code_attr.
  (USMAX): New code iterator.
  * config/aarch64/predicates.md (aarch64_smin): New predicate.
  (aarch64_smax): Likewise.
  * config/aarch64/aarch64-simd.md (abd_3): Rename to...
  (*aarch64_abd_3): ... Change RTL representation to
  MINUS (MAX MIN).

2019-04-02  Kyrylo Tkachov  

  * gcc.target/aarch64/abd_1.c: New test.


diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 96360d877a404a6795aef5458345e87de9006cf4..eb99d3ab881e29f3069991e4f778be95d51ec4da 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -705,13 +705,22 @@ (define_insn "aarch64_abs"
   [(set_attr "type" "neon_abs")]
 )
 
-(define_insn "abd_3"
+;; It's tempting to represent SABD as ABS (MINUS op1 op2).
+;; This isn't accurate as ABS treats always its input as a signed value.
+;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
+;; Whereas SABD would return 192 (-64 signed) on the above example.
+;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
+(define_insn "*aarch64_abd_3"
   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
-	(abs:VDQ_BHSI (minus:VDQ_BHSI
-		   (match_operand:VDQ_BHSI 1 "register_operand" "w")
-		   (match_operand:VDQ_BHSI 2 "register_operand" "w"]
-  "TARGET_SIMD"
-  "sabd\t%0., %1., %2."
+	(minus:VDQ_BHSI
+	  (USMAX:VDQ_BHSI
+	(match_operand:VDQ_BHSI 1 "register_operand" "w")
+	(match_operand:VDQ_BHSI 2 "register_operand" "w"))
+	  (match_operator 3 "aarch64_"
+	[(match_dup 1)
+	 (match_dup 2)])))]
+  "TARGET_SIMD"
+  "abd\t%0., %1., %2."
   [(set_attr "type" "neon_abd")]
 )
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e9ede2934a43539279ae68049fb66b84114d90ea..16e4dbda73ab928054590c47a4398408162c0332 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1056,6 +1056,9 @@ (define_mode_attr f16quad [(V2SF "") (V4SF "q")])
 
 (define_code_attr f16mac [(plus "a") (minus "s")])
 
+;; Map smax to smin and umax to umin.
+(define_code_attr max_opp [(smax "smin") (umax "umin")])
+
 ;; The number of subvectors in an SVE_STRUCT.
 (define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2")
 (VNx8SI  "2") (VNx4DI  "2")
@@ -1204,6 +1207,9 @@ (define_code_iterator MAXMIN [smax smin umax umin])
 
 (define_code_iterator FMAXMIN [smax smin])
 
+;; Signed and unsigned max operations.
+(define_code_iterator USMAX [smax umax])
+
 ;; Code iterator for variants of vector max and min.
 (define_code_iterator ADDSUB [plus minus])
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 855cf7b52f840a81e0bbd2b31fd59fc860060626..b8e6d232ff6237a58addda1ec0e953a1054dc616 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -319,6 +319,12 @@ (define_predicate "aarch64_reg_or_imm"
   (ior (match_operand 0 "register_operand")
(match_operand 0 "const_scalar_int_operand")))
 
+(define_predicate "aarch64_smin"
+  (match_code "smin"))
+
+(define_predicate "aarch64_umin"
+  (match_code "umin"))
+
 ;; True for integer

Re: [REVISED PATCH 7/9]: C++ P0482R5 char8_t: New standard library tests

2019-02-07 Thread Jonathan Wakely

On 23/12/18 21:27 -0500, Tom Honermann wrote:
Attached is a revised patch that addresses changes in P0482R6.  
Changes from the prior patch include:

- Updated the value of the __cpp_char8_t feature test macro to 201811.

Tested on x86_64-linux.


There are quite a few additional changes needed to make the testsuite
pass cleanly with non-default options, e.g. when running it with
RUNTESTFLAGS=--target_board=unix/-fchar8_t/-fno-inline I see these
failures:

FAIL: 21_strings/basic_string/literals/types.cc (test for excess errors)
FAIL: 21_strings/basic_string/literals/values.cc (test for excess errors)
UNRESOLVED: 21_strings/basic_string/literals/values.cc compilation failed to 
produce executable
FAIL: 21_strings/basic_string_view/literals/types.cc (test for excess errors)
FAIL: 21_strings/basic_string_view/literals/values.cc (test for excess errors)
UNRESOLVED: 21_strings/basic_string_view/literals/values.cc compilation failed 
to produce executable
FAIL: 22_locale/codecvt/char16_t.cc (test for excess errors)
UNRESOLVED: 22_locale/codecvt/char16_t.cc compilation failed to produce 
executable
FAIL: 22_locale/codecvt/char32_t.cc (test for excess errors)
UNRESOLVED: 22_locale/codecvt/char32_t.cc compilation failed to produce 
executable
FAIL: 22_locale/codecvt/codecvt_utf8/79980.cc (test for excess errors)
UNRESOLVED: 22_locale/codecvt/codecvt_utf8/79980.cc compilation failed to 
produce executable
FAIL: 22_locale/codecvt/codecvt_utf8/wchar_t/1.cc (test for excess errors)
UNRESOLVED: 22_locale/codecvt/codecvt_utf8/wchar_t/1.cc compilation failed to 
produce executable
FAIL: 22_locale/codecvt/utf8.cc (test for excess errors)
UNRESOLVED: 22_locale/codecvt/utf8.cc compilation failed to produce executable
FAIL: 22_locale/conversions/string/2.cc (test for excess errors)
UNRESOLVED: 22_locale/conversions/string/2.cc compilation failed to produce 
executable
FAIL: 22_locale/conversions/string/3.cc (test for excess errors)
UNRESOLVED: 22_locale/conversions/string/3.cc compilation failed to produce 
executable
FAIL: experimental/string_view/literals/types.cc (test for excess errors)
FAIL: experimental/string_view/literals/values.cc (test for excess errors)
UNRESOLVED: experimental/string_view/literals/values.cc compilation failed to 
produce executable

There would be similar errors running all the tests with -std=c++2a,
which is definitely something I do often and so want the tests to be
clean. We can either disable those tests when char8_t is enabled
(because we already have alternative tests checking the char8_t
versions of string_view etc.) or make them work either way, which the
attached patch begins doing (more changes are needed).

I expect a different set of failures for -fno-char8_t (which is
probably a less important case to support that enabling char8_t in
older standards, but maybe still worth testing now and then).

commit fb345c6806bd4f1efdb5937e083076e79f2086dd
Author: Jonathan Wakely 
Date:   Thu Feb 7 09:04:11 2019 +

fixup tests

diff --git a/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc b/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc
index e83cf98e4c7..2e97b4f3ab0 100644
--- a/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc
+++ b/libstdc++-v3/testsuite/22_locale/conversions/string/2.cc
@@ -40,14 +40,14 @@ void test01()
   typedef str_conv sc;
 
   const sc::byte_string berr = "invalid wide string";
-  const sc::wide_string werr = u8"invalid byte string";
+  const sc::wide_string werr = "invalid byte string";
 
   sc c(berr, werr);
   string input = "Stop";
   input += char(0xFF);
   string woutput = c.from_bytes(input);
   VERIFY( input == woutput ); // noconv case doesn't detect invalid input
-  string winput = u8"Stop";
+  string winput = "Stop";
   winput += char(0xFF);
   string output = c.to_bytes(winput);
   VERIFY( winput == output ); // noconv case doesn't detect invalid input
diff --git a/libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc b/libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc
index 144c8582a41..ac03f362411 100644
--- a/libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc
+++ b/libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc
@@ -31,9 +31,18 @@
 # error "ATOMIC_CHAR_LOCK_FREE must be 1 or 2"
 #endif
 
+#ifdef _GLIBCXX_USE_CHAR8_T
+# ifndef ATOMIC_CHAR8_T_LOCK_FREE
+#  error "ATOMIC_CHAR8_T_LOCK_FREE must be a macro"
+# elif ATOMIC_CHAR8_T_LOCK_FREE != 1 && ATOMIC_CHAR8_T_LOCK_FREE != 2
+#  error "ATOMIC_CHAR8_T_LOCK_FREE must be 1 or 2"
+# endif
+#endif
+
 #ifndef ATOMIC_CHAR16_T_LOCK_FREE 
 # error "ATOMIC_CHAR16_T_LOCK_FREE must be a macro"
 #elif ATOMIC_CHAR16_T_LOCK_FREE != 1 && ATOMIC_CHAR16_T_LOCK_FREE != 2
+# error "ATOMIC_CHAR16_T_LOCK_FREE must be 1 or 2"
 #endif
 
 #ifndef ATOMIC_CHAR32_T_LOCK_FREE 
diff --git a/libstdc++-v3/testsuite/29_atomics/headers/atomic/types_std_c++0x.cc b/libstdc++-v3/testsuite/29_atomics/headers/atomic/types_std_c++0x.cc
index c53bb24fa49..47084c4a33a 100644
--- a/libst

[PATCH] S/390: Introduce jdd constraint

2019-02-07 Thread Ilya Leoshkevich
Bootstrapped and regtested on s390x-redhat-linux.

Implementation of section anchors in S/390 back-end added in r266741
broke jump labels in S/390 Linux kernel [1].  Currently jump labels
pass global variable addresses to .quad directive in inline assembly
using "X" constraint.  In the past this used to produce regular symbol
references, however, after r266741 we sometimes get values like
(plus (reg) (const_int)), where (reg) points to a section anchor.
Strictly speaking, this is still correct, since "X" accepts anything.
Thus, now we need another way to support jump labels.

The existing "i" constraint cannot be used, since with -fPIC it must
not accept non-local symbols, however, jump labels do require that,
e.g. __tracepoint_xdp_exception from kernel proper might be referenced
from kernel modules.

The existing "ZL" constraint cannot be used for the same reason.

The existing "b" constraint cannot be used because of the way
expand_asm_stmt works.  It deduces whether the constraint allows
regs, subregs or mems, and processes asm operands differently based on
that.  "b" is supposed to accept values like (mem (symbol_ref)), and
there appears to be no way to explain to expand_asm_stmt that for "b"
mem's address must not be in a register.

This patch introduces the new machine-specific constraint named "jdd" -
"j" prefix is already used for constants, and "d" stands for "data".
It accepts anything that fits into the data section, whether or not
this might require a relocation, that is, anything that passes
CONSTANT_P check.

[1] https://lkml.org/lkml/2019/1/23/346

gcc/ChangeLog:

2019-02-06  Ilya Leoshkevich  

* config/s390/constraints.md (jdd): New constraint.

gcc/testsuite/ChangeLog:

2019-02-06  Ilya Leoshkevich  

* gcc.target/s390/jump-label.c: New test.
---
 gcc/config/s390/constraints.md | 17 +
 gcc/testsuite/gcc.target/s390/jump-label.c | 19 +++
 2 files changed, 36 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/jump-label.c

diff --git a/gcc/config/s390/constraints.md b/gcc/config/s390/constraints.md
index 688dd96e0e2..4055cbc7c68 100644
--- a/gcc/config/s390/constraints.md
+++ b/gcc/config/s390/constraints.md
@@ -37,6 +37,7 @@
 ;; jKK: constant vector with all elements having the same value and
 ;;  matching K constraint
 ;; jm6: An integer operand with the lowest order 6 bits all ones.
+;; jdd: A constant operand that fits into the data section.
 ;;t -- Access registers 36 and 37.
 ;;v -- Vector registers v0-v31.
 ;;C -- A signed 8-bit constant (-128..127)
@@ -567,3 +568,19 @@
 (define_constraint "ZL"
   "LARL operand when in 64-bit mode, otherwise nothing."
   (match_test "TARGET_64BIT && larl_operand (op, VOIDmode)"))
+
+;; This constraint must behave like "i", in particular, the matching values
+;; must never be placed into registers or memory by
+;; cfgexpand.c:expand_asm_stmt.  It could be straightforward to start its name
+;; with a letter from genpreds.c:const_int_constraints, however it would
+;; require using (match_code "const_int"), which is infeasible.  To achieve the
+;; same effect, that is, setting maybe_allows_reg and maybe_allows_mem to false
+;; in genpreds.c:add_constraint, we explicitly exclude reg, subreg and mem
+;; codes.
+(define_constraint "jdd"
+  "A constant operand that fits into the data section.
+   Usage of this constraint might produce a relocation."
+  (and (not (match_code "reg"))
+   (not (match_code "subreg"))
+   (not (match_code "mem"))
+   (match_test "CONSTANT_P (op)")))
diff --git a/gcc/testsuite/gcc.target/s390/jump-label.c 
b/gcc/testsuite/gcc.target/s390/jump-label.c
new file mode 100644
index 000..3de73f6bb6c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/jump-label.c
@@ -0,0 +1,19 @@
+/* Test jdd constraint, which is used for linux kernel jump labels.  */
+
+/* { dg-do link } */
+/* { dg-options "-O2 -fPIC -shared" } */
+
+__attribute__ ((visibility ("default"))) extern int i;
+
+void f (void)
+{
+  asm goto (".pushsection foo\n"
+#if defined(__s390x__)
+".quad %0-.\n"
+#else
+".long %0-.\n"
+#endif
+".popsection\n"
+: : "jdd" (&i) : : l);
+l:;
+}
-- 
2.20.1



Re: [PATCH][wwwdocs][Arm][AArch64] Update changes with new features and flags.

2019-02-07 Thread Gerald Pfeifer
On Wed, 6 Feb 2019, Tamar Christina wrote:
> I've updated the patch with your suggested changes and have grouped
> the Arm and AArch64 targets a bit.

Thanks, Tamar!

> Ok for commit?

Yes, I had meant to imply this in my original review. :-)

Only note is that this seems to hop from  to , skipping ?

Gerald


RE: [PATCH][wwwdocs][Arm][AArch64] Update changes with new features and flags.

2019-02-07 Thread Tamar Christina
Hi Gerard,

> 
> On Wed, 6 Feb 2019, Tamar Christina wrote:
> > I've updated the patch with your suggested changes and have grouped
> > the Arm and AArch64 targets a bit.
> 
> Thanks, Tamar!
> 
> > Ok for commit?
> 
> Yes, I had meant to imply this in my original review. :-)
> 
> Only note is that this seems to hop from  to , skipping ?

Yeah, the h4 heading was still a bit big, so visually it didn't look quite like 
a "child" entry. So I skipped on level.

Cheers,
Tamar
> 
> Gerald


RE: [PATCH][GCC][AArch64] Have empty HWCAPs string ignored during native feature detection

2019-02-07 Thread Tamar Christina
Hi All,

Since this hasn't been reviewed yet anyway I've updated this patch to also fix 
the memory leaks etc.

--

This patch makes the feature detection code for AArch64 GCC not add features
automatically when the feature had no hwcaps string to match against.

This means that -mcpu=native no longer adds feature flags such as +profile.
The behavior wasn't noticed before because at the time +profile was added a bug
was preventing any feature bits from being added by native detections.

The loop has also been changed as Jakub specified in order to avoid a memory
leak that was present in the existing code and to be slightly more efficient.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for trunk?

Thanks,
Tamar

gcc/ChangeLog:

2019-02-07  Tamar Christina  

PR target/88530
* config/aarch64/aarch64-option-extensions.def: Document it.
* config/aarch64/driver-aarch64.c (host_detect_local_cpu): Skip feature
if empty hwcaps.

gcc/testsuite/ChangeLog:

2019-02-07  Tamar Christina  

PR target/88530
* gcc.target/aarch64/options_set_10.c: New test.

> -Original Message-
> From: gcc-patches-ow...@gcc.gnu.org 
> On Behalf Of Tamar Christina
> Sent: Wednesday, January 30, 2019 14:48
> To: Jakub Jelinek 
> Cc: Kyrill Tkachov ; gcc-patches@gcc.gnu.org;
> nd ; James Greenhalgh ;
> Richard Earnshaw ; Marcus Shawcroft
> 
> Subject: Re: [PATCH][GCC][AArch64] Have empty HWCAPs string ignored
> during native feature detection
> 
> Hi Jakub,
> 
> On Wed, Jan 30, 2019 at 02:06:01PM +, Tamar Christina wrote:
> > > Thanks for the feedback, but I think those are changes for another patch.
> >
> > At least the memory leak is something that should be fixed even in
> > stage4 IMNSHO.
> 
> I'll provide a separate patch for this then.
> 
> > Anyway, will defer to aarch64 maintainers here.
> 
> > Just one question, for the *feat_string == '\0' case, is continue what
> > you want, rather than just enabled = false; and doing the
>  > extension_flags &= ~(aarch64_extensions[i].flag);
> > later on?
> 
> Yeah, because the feature may be on by default due to another extension, in
> which case you would erroneously turn it off. The absence of an HWCAPS
> shouldn't pro-actively disable an extension.
> 
> Regards,
> Tamar
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 50909debf5455d57b86db91a0a5539fed1d5b91e..a6b3ae2b73f285e62e0f24c92bc5efefc9ffb59c 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -43,7 +43,8 @@
the extension (for example, the 'crypto' extension depends on four
entries: aes, pmull, sha1, sha2 being present).  In that case this field
should contain a space (" ") separated list of the strings in 'Features'
-   that are required.  Their order is not important.  */
+   that are required.  Their order is not important.  An empty string means
+   do not detect this feature during auto detection.  */
 
 /* Enabling "fp" just enables "fp".
Disabling "fp" also disables "simd", "crypto", "fp16", "aes", "sha2",
diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c
index 05f1360d48583473820c8008cc09ed139bddc0ce..9515061ec985cc374c7c510c6954a6a3c240814f 100644
--- a/gcc/config/aarch64/driver-aarch64.c
+++ b/gcc/config/aarch64/driver-aarch64.c
@@ -250,27 +250,35 @@ host_detect_local_cpu (int argc, const char **argv)
 	{
 	  for (i = 0; i < num_exts; i++)
 	{
-	  char *p = NULL;
-	  char *feat_string
-		= concat (aarch64_extensions[i].feat_string, NULL);
+	  const char *p = aarch64_extensions[i].feat_string;
+
+	  /* If the feature contains no HWCAPS string then ignore it for the
+		 auto detection.  */
+	  if (*p == '\0')
+		continue;
+
 	  bool enabled = true;
 
 	  /* This may be a multi-token feature string.  We need
-		 to match all parts, which could be in any order.
-		 If this isn't a multi-token feature string, strtok is
-		 just going to return a pointer to feat_string.  */
-	  p = strtok (feat_string, " ");
-	  while (p != NULL)
+		 to match all parts, which could be in any order.  */
+	  size_t len = strlen (buf);
+	  do
 		{
-		  if (strstr (buf, p) == NULL)
+		  const char *end = strchr (p, ' ');
+		  if (end == NULL)
+		end = strchr (p, '\0');
+		  if (memmem (buf, len, p, end - p) == NULL)
 		{
 		  /* Failed to match this token.  Turn off the
 			 features we'd otherwise enable.  */
 		  enabled = false;
 		  break;
 		}
-		  p = strtok (NULL, " ");
+		  if (*end == '\0')
+		break;
+		  p = end + 1;
 		}
+	  while (1);
 
 	  if (enabled)
 		extension_flags |= aarch64_extensions[i].flag;
@@ -360,12 +368,12 @@ host_detect_local_cpu (int argc, const char **argv)
 not_found:
   {
/* If detection fails we ignore the option.
-  Clean up and return empty string.  */
+  

[committed][PATCH][GCC][AArch64] Fix initializer for array so it's a C initializer instead of C++

2019-02-07 Thread Tamar Christina
Hi All,

This fixes a missing = that would cause the array initializer to be a C++
initializer instead of a C one, causing a warning when building with pre-C++11
standards compiler.

Committed under the GCC obvious rules.

gcc/ChangeLog:

2019-02-07  Tamar Christina  

* config/aarch64/aarch64-builtins.c (aarch64_fcmla_lane_builtin_data):
Make it a C initializer.

-- 
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index c8f5a555f6724433dc6cea1cff3547c0c66c54a7..d7b1b7bd6867a0f98a2f67fec0fd80a0a08f69c1 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -455,7 +455,7 @@ static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
 
 /* This structure contains how to manage the mapping form the builtin to the
instruction to generate in the backend and how to invoke the instruction.  */
-static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] {
+static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
   AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
 };
 



Re: [rs6000] 64-bit integer loads/stores and FP instructions

2019-02-07 Thread Segher Boessenkool
Hi!

On Wed, Feb 06, 2019 at 11:08:44PM +0100, Eric Botcazou wrote:
> as reported e.g. at https://gcc.gnu.org/ml/gcc-help/2018-11/msg00038.html, 
> the 
> 7 series of compilers started to use FP instructions for simple 64-bit 
> integer 
> loads/stores in unexpected ways.  Consider:



> The difference between GCC 7 and GCC 8 comes from this change:
>   https://gcc.gnu.org/ml/gcc-patches/2018-06/msg00584.html
> which seems to imply that the first change was problematic but which was not 
> backported to the 7 branch.
> 
> So at a minimum I think that the second change should be backported to the 7 
> branch; it applies (almost) cleanly and gives no regressions on PowerPC/Linux.
> 
>   Backport from mainline
>   2018-06-11  Segher Boessenkool  
> 
>   PR target/85755
>   * config/rs6000/rs6000.md (*movdi_internal32): Put constraint modifiers
>   on the correct operand.
>   (*movdi_internal64): Ditto.

Backporting this is okay.  (It was not done because it does not affect
correctness).  What is the "almost", btw?

> But I also think that another part of the first change is problematic, namely 
> the removal of the '*' modifier on the alternatives, which means that the 
> register preference of the instruction isn't skewed towards integer registers 
> any more.

(In https://gcc.gnu.org/ml/gcc-patches/2016-11/txt4j9hLRLCzf.txt and you
presumably mean the *movdi_internal32 hunk, and the first line of it:

- "=Y,r, r, ?m,?*d,?*d,
+ "=Y,r, r, ^m,^d, ^d,

where the last three are mem<-reg, reg<-mem, reg<-reg, for fp regs).

This patch was a year before we switched to LRA, and for non-LRA ports *
does not do any register preferencing.

>  That may be OK for native targets, but that is frowned upon for 
> embedded targets, where people are really unhappy when the compiler emits 
> floating-point instructions for pure integer code for no apparent reason.

For 6xx/7xx people *wanted* to use FP loads and stores whenever possible,
because you get twice the bandwidth with them.

If you do not want the FP registers used (or FP insns used), use
-msoft-float, that's what it's for.

Patches are welcome, but complicating this already very complex code by
introducing an arbitrary distinction between "embedded and other" (or
actually, "server and other") isn't the way to go.  Sorry.

Could you open a PR please?


Segher


Re: [PATCH] Integration of parallel standard algorithms for c++17

2019-02-07 Thread Jonathan Wakely

On 31/01/19 21:08 -0800, Thomas Rodgers wrote:

Update C++17 parallel algorithms to LLVM/MIT licensed upstream sources


Some lines in bits/c++config.h need to be split before 80 columns
(with a backslash if the split is in the middle of a preprocessor
condition obviously).

There are loads of very long lines in the actual PSTL headers too, but
I think we need to keep them similar to the upstream headers to aid
merging, so can't gratuitously reformat them. I'm assuming that
doesn't apply to  since that's our header and any
changes will need to be manually applied anyway, right?

We'll need to add a copy of the LICENSE.TXT file to our sources, since
it's referred to by the comments at the top of each PSTL file.

Typo in include/Makefile.am: ${pstl_srcdir}/memoy_impl.h
(Which will require regenerating include/Makefile.in).

The __cpp_lib_parallel_algorithm macro needs to be (conditionally)
defined in  as well as  and .

The namespaces par_backend and unseq_backend need to be uglified.

ALL the names in the __pstl::internal namespace need to be uglified:
brick_any_of
pattern_any_of
for_each_n_it_serial
brick_walk1
pattern_walk1
etc.

I see quite a few calls to functions that should probably be qualified
to prevent ADL, but that can be fixed later (and upstream first):

  return brick_count(...
  return except_handler([&]() { ...


The DejaGnu directives in the pstl tests need to be in this order:

// { dg-options "-std=gnu++17 -ltbb" }
// { dg-do run { target c++17 } }
// { dg-require-effective-target tbb-backend }

Currently the dg-require-effective-target comes before the dg-do
line, so doesn't work. DejaGnu processes the lines in order. When it
sees the dg-do for target c++17 it overrides the effect of any earlier
dg-require-effective-target.

I fixed that locally like so:
find testsuite/20_util/specialized_algorithms/pstl/ 
testsuite/25_algorithms/pstl/ testsuite/26_numerics/pstl/ -name \*.cc | xargs 
sed -i '/dg-require-eff/{h;d};/dg-do/{p;x}'
And now I don't get FAIL results on a system with no TBB installed
(which is great). I also get no FAIL results on a system with TBB
installed (also great).


There are two copies (with slight differences) of each of the
uninitialized_xxx tests:

libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_construct.cc
|  128 +++
libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_copy_move.cc
|  143 
libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_fill_destroy.cc
 |  103 ++
libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_construct.cc
   |  132 
libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_copy_move.cc
   |  154 +
libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_fill_destroy.cc
|  104 ++

That's all for now, but I'll keep making passes over it, going into
more detail ...




Re: [Patch] [arm] Fix 88714, Arm LDRD/STRD peepholes

2019-02-07 Thread Matthew Malcomson


>>
> 
> Please add the PR marker to the testsuite ChangeLog as well.
> I've been following this PR a bit from the sidelines, I believe a 
> substantial amount of code
> (and one of the testcases) was written by Jakub, so please add him to 
> the ChangeLog entries as well.
> 
> This looks ok to me.
> Thanks for fixing this and thanks Jakub for the analysis and fixes too!
> 
> Kyrill
> 

Thanks Kyrill,
I've committed with the changelog below.


gcc/ChangeLog:

2019-02-07  Matthew Malcomson  
Jakub Jelinek  

PR bootstrap/88714
* config/arm/arm-protos.h (valid_operands_ldrd_strd,
arm_count_ldrdstrd_insns): New declarations.
* config/arm/arm.c (mem_ok_for_ldrd_strd): Remove broken handling of
MINUS.
(valid_operands_ldrd_strd): New function.
(arm_count_ldrdstrd_insns): New function.
* config/arm/ldrdstrd.md: Change peepholes to generate PARALLEL SImode
sets instead of single DImode set and define new insns to match this.


gcc/testsuite/ChangeLog:

2019-02-07  Matthew Malcomson  
Jakub Jelinek  

PR bootstrap/88714
* gcc.c-torture/execute/pr88714.c: New test.
* gcc.dg/rtl/arm/ldrd-peepholes.c: New test.



Backports to 8.x branch

2019-02-07 Thread Jakub Jelinek
Hi!

Another month have passed since my last 8.x backporting effort,
thus I've backported following 32 patches from trunk to 8.x,
bootstrapped/regtested on x86_64-linux and i686-linux and committed.

Jakub
2019-02-07  Jakub Jelinek  

Backported from mainline
2019-01-07  Jakub Jelinek  

PR debug/88723
* dwarf2out.c (const_ok_for_output_1): Remove redundant call to
const_not_ok_for_debug_p target hook.
(mem_loc_descriptor) : Only call const_ok_for_output_1
on UNSPEC and subexpressions thereof if all subexpressions of the
UNSPEC are CONSTANT_P.

2019-01-05  Jakub Jelinek  

PR debug/88635
* dwarf2out.c (const_ok_for_output_1): Reject MINUS that contains
SYMBOL_REF, CODE_LABEL or UNSPEC in subexpressions of second argument.
Reject PLUS that contains SYMBOL_REF, CODE_LABEL or UNSPEC in
subexpressions of both operands.
(mem_loc_descriptor): Handle UNSPEC if target hook acks it and all the
subrtxes are CONSTANT_P.

* gcc.dg/debug/dwarf2/pr88635.c: New test.

--- gcc/dwarf2out.c (revision 267593)
+++ gcc/dwarf2out.c (revision 267594)
@@ -14401,6 +14401,10 @@ expansion_failed (tree expr, rtx rtl, ch
 }
 }
 
+/* True if handling a former CONST by mem_loc_descriptor piecewise.  */
+
+static bool in_const_p;
+
 /* Helper function for const_ok_for_output.  */
 
 static bool
@@ -14423,6 +14427,7 @@ const_ok_for_output_1 (rtx rtl)
 one in a constant pool entry, so testing SYMBOL_REF_TLS_MODEL
 rather than DECL_THREAD_LOCAL_P is not just an optimization.  */
   if (flag_checking
+ && !in_const_p
  && (XVECLEN (rtl, 0) == 0
  || GET_CODE (XVECEXP (rtl, 0, 0)) != SYMBOL_REF
  || SYMBOL_REF_TLS_MODEL (XVECEXP (rtl, 0, 0)) == TLS_MODEL_NONE))
@@ -14446,13 +14451,6 @@ const_ok_for_output_1 (rtx rtl)
   if (CONST_POLY_INT_P (rtl))
 return false;
 
-  if (targetm.const_not_ok_for_debug_p (rtl))
-{
-  expansion_failed (NULL_TREE, rtl,
-   "Expression rejected for debug by the backend.\n");
-  return false;
-}
-
   /* FIXME: Refer to PR60655. It is possible for simplification
  of rtl expressions in var tracking to produce such expressions.
  We should really identify / validate expressions
@@ -14465,6 +14463,41 @@ const_ok_for_output_1 (rtx rtl)
 case NOT:
 case NEG:
   return false;
+case PLUS:
+  {
+   /* Make sure SYMBOL_REFs/UNSPECs are at most in one of the
+  operands.  */
+   subrtx_var_iterator::array_type array;
+   bool first = false;
+   FOR_EACH_SUBRTX_VAR (iter, array, XEXP (rtl, 0), ALL)
+ if (SYMBOL_REF_P (*iter)
+ || LABEL_P (*iter)
+ || GET_CODE (*iter) == UNSPEC)
+   {
+ first = true;
+ break;
+   }
+   if (!first)
+ return true;
+   FOR_EACH_SUBRTX_VAR (iter, array, XEXP (rtl, 1), ALL)
+ if (SYMBOL_REF_P (*iter)
+ || LABEL_P (*iter)
+ || GET_CODE (*iter) == UNSPEC)
+   return false;
+   return true;
+  }
+case MINUS:
+  {
+   /* Disallow negation of SYMBOL_REFs or UNSPECs when they
+  appear in the second operand of MINUS.  */
+   subrtx_var_iterator::array_type array;
+   FOR_EACH_SUBRTX_VAR (iter, array, XEXP (rtl, 1), ALL)
+ if (SYMBOL_REF_P (*iter)
+ || LABEL_P (*iter)
+ || GET_CODE (*iter) == UNSPEC)
+   return false;
+   return true;
+  }
 default:
   return true;
 }
@@ -15608,6 +15641,7 @@ mem_loc_descriptor (rtx rtl, machine_mod
 pool.  */
 case CONST:
 case SYMBOL_REF:
+case UNSPEC:
   if (!is_a  (mode, &int_mode)
  || (GET_MODE_SIZE (int_mode) > DWARF2_ADDR_SIZE
 #ifdef POINTERS_EXTEND_UNSIGNED
@@ -15615,6 +15649,43 @@ mem_loc_descriptor (rtx rtl, machine_mod
 #endif
  ))
break;
+
+  if (GET_CODE (rtl) == UNSPEC)
+   {
+ /* If delegitimize_address couldn't do anything with the UNSPEC, we
+can't express it in the debug info.  This can happen e.g. with some
+TLS UNSPECs.  Allow UNSPECs formerly from CONST that the backend
+approves.  */
+ bool not_ok = false;
+
+ if (!in_const_p)
+   break;
+
+ subrtx_var_iterator::array_type array;
+ FOR_EACH_SUBRTX_VAR (iter, array, rtl, ALL)
+   if (*iter != rtl && !CONSTANT_P (*iter))
+ {
+   not_ok = true;
+   break;
+ }
+
+ if (not_ok)
+   break;
+
+ FOR_EACH_SUBRTX_VAR (iter, array, rtl, ALL)
+   if (!const_ok_for_output_1 (*iter))
+ {
+   not_ok = true;
+   break;
+ }
+
+ if (not_ok)
+   break;
+
+ rtl = gen_rtx_CONST (GET_MODE (rtl), rtl);

Re: [PATCH] correct __clear_cache signature

2019-02-07 Thread Martin Sebor

On 2/7/19 2:46 AM, Segher Boessenkool wrote:

Hi Martin,

On Wed, Feb 06, 2019 at 05:28:08PM -0700, Martin Sebor wrote:

  void
-__clear_cache (char *beg __attribute__((__unused__)),
-  char *end __attribute__((__unused__)))
+__clear_cache (void *beg __attribute__((__unused__)),
+  void *end __attribute__((__unused__)))
  {
  #ifdef CLEAR_INSN_CACHE
-  CLEAR_INSN_CACHE (beg, end);
+  /* Cast the void* pointers to char* as some implementations
+ of the macro assume the pointers can be subtracted from
+ one another.  */
+  CLEAR_INSN_CACHE ((char *) beg, (char *) end);
  #endif /* CLEAR_INSN_CACHE */
  }


You can subtract pointers to void in GCC just fine, it's a GNU C extension.
See extend.texi:

   In GNU C, addition and subtraction operations are supported on pointers to
   @code{void} and on pointers to functions.  This is done by treating the
   size of a @code{void} or of a function as 1.

(and libgcc is built with GCC always).


Sure.  The macro is a no-op on x86_64 where I tested so I didn't want
to take any chances this late in stage 4 and end up breaking someone's
builds.

Martin


[Committed] S/390: Fix the vec_xl / vec_xst style builtins

2019-02-07 Thread Andreas Krebbel
This patch fixes several problems with the vec_xl/vec_xst builtins:

- vec_xl/vec_xst needs to use the alignment of the scalar memory
  operand for the vector type reference. This is required to emit the
  proper vl/vst alignment hints.
- vec_xl / vec_xld2 / vec_xlw4 should accept const pointer source operands
- vec_xlw4 / vec_xstw4 needs to accept float memory operands

Bootstrapped and regression tested on s390x.

gcc/ChangeLog:

2019-02-07  Andreas Krebbel  

* config/s390/s390-builtin-types.def: Add new types.
* config/s390/s390-builtins.def: (s390_vec_xl, s390_vec_xld2)
(s390_vec_xlw4): Make the memory operand into a const pointer.
(s390_vec_xld2, s390_vec_xlw4): Add a variant for single precision
float.
* config/s390/s390-c.c (s390_expand_overloaded_builtin): Generate
a new vector type with the alignment of the scalar memory operand.

gcc/testsuite/ChangeLog:

2019-02-07  Andreas Krebbel  

* gcc.target/s390/zvector/xl-xst-align-1.c: New test.
* gcc.target/s390/zvector/xl-xst-align-2.c: New test.
---
 gcc/config/s390/s390-builtin-types.def | 14 -
 gcc/config/s390/s390-builtins.def  | 65 +++---
 gcc/config/s390/s390-c.c   | 28 +++---
 .../gcc.target/s390/zvector/xl-xst-align-1.c   | 45 +++
 .../gcc.target/s390/zvector/xl-xst-align-2.c   | 48 
 5 files changed, 161 insertions(+), 39 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/xl-xst-align-2.c

diff --git a/gcc/config/s390/s390-builtin-types.def 
b/gcc/config/s390/s390-builtin-types.def
index e749d6d..58d920c 100644
--- a/gcc/config/s390/s390-builtin-types.def
+++ b/gcc/config/s390/s390-builtin-types.def
@@ -260,6 +260,7 @@ DEF_FN_TYPE_2 (BT_FN_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
 DEF_FN_TYPE_2 (BT_FN_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI)
+DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDCONSTPTR, BT_V4SI, BT_INT, BT_VOIDCONSTPTR)
 DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDPTR, BT_V4SI, BT_INT, BT_VOIDPTR)
 DEF_FN_TYPE_2 (BT_FN_V4SI_UV4SI_UV4SI, BT_V4SI, BT_UV4SI, BT_UV4SI)
 DEF_FN_TYPE_2 (BT_FN_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI)
@@ -492,6 +493,7 @@ DEF_OV_TYPE (BT_OV_USHORT_UV8HI_INT, BT_USHORT, BT_UV8HI, 
BT_INT)
 DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI, BT_UV16QI, BT_BV16QI, BT_BV16QI)
 DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_BV16QI_INTPTR, BT_UV16QI, BT_BV16QI, 
BT_BV16QI, BT_INTPTR)
 DEF_OV_TYPE (BT_OV_UV16QI_BV16QI_UV16QI, BT_UV16QI, BT_BV16QI, BT_UV16QI)
+DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARCONSTPTR, BT_UV16QI, BT_LONG, 
BT_UCHARCONSTPTR)
 DEF_OV_TYPE (BT_OV_UV16QI_LONG_UCHARPTR, BT_UV16QI, BT_LONG, BT_UCHARPTR)
 DEF_OV_TYPE (BT_OV_UV16QI_UCHAR, BT_UV16QI, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_UV16QI_UCHARCONSTPTR, BT_UV16QI, BT_UCHARCONSTPTR)
@@ -523,6 +525,7 @@ DEF_OV_TYPE (BT_OV_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, 
BT_UV8HI, BT_UV8HI, BT_
 DEF_OV_TYPE (BT_OV_UV16QI_V16QI, BT_UV16QI, BT_V16QI)
 DEF_OV_TYPE (BT_OV_UV16QI_V8HI_V8HI, BT_UV16QI, BT_V8HI, BT_V8HI)
 DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI)
+DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR, BT_UV2DI, BT_LONG, 
BT_ULONGLONGCONSTPTR)
 DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR)
 DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONGCONSTPTR, BT_UV2DI, BT_ULONGLONGCONSTPTR)
@@ -556,6 +559,8 @@ DEF_OV_TYPE (BT_OV_UV2DI_V2DI, BT_UV2DI, BT_V2DI)
 DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI, BT_UV4SI, BT_BV4SI, BT_BV4SI)
 DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_BV4SI_INTPTR, BT_UV4SI, BT_BV4SI, BT_BV4SI, 
BT_INTPTR)
 DEF_OV_TYPE (BT_OV_UV4SI_BV4SI_UV4SI, BT_UV4SI, BT_BV4SI, BT_UV4SI)
+DEF_OV_TYPE (BT_OV_UV4SI_LONG_FLTPTR, BT_UV4SI, BT_LONG, BT_FLTPTR)
+DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTCONSTPTR, BT_UV4SI, BT_LONG, BT_UINTCONSTPTR)
 DEF_OV_TYPE (BT_OV_UV4SI_LONG_UINTPTR, BT_UV4SI, BT_LONG, BT_UINTPTR)
 DEF_OV_TYPE (BT_OV_UV4SI_UINT, BT_UV4SI, BT_UINT)
 DEF_OV_TYPE (BT_OV_UV4SI_UINTCONSTPTR, BT_UV4SI, BT_UINTCONSTPTR)
@@ -593,6 +598,7 @@ DEF_OV_TYPE (BT_OV_UV4SI_V4SI, BT_UV4SI, BT_V4SI)
 DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI, BT_UV8HI, BT_BV8HI, BT_BV8HI)
 DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_BV8HI_INTPTR, BT_UV8HI, BT_BV8HI, BT_BV8HI, 
BT_INTPTR)
 DEF_OV_TYPE (BT_OV_UV8HI_BV8HI_UV8HI, BT_UV8HI, BT_BV8HI, BT_UV8HI)
+DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTCONSTPTR, BT_UV8HI, BT_LONG, 
BT_USHORTCONSTPTR)
 DEF_OV_TYPE (BT_OV_UV8HI_LONG_USHORTPTR, BT_UV8HI, BT_LONG, BT_USHORTPTR)
 DEF_OV_TYPE (BT_OV_UV8HI_USHORT, BT_UV8HI, BT_USHORT)
 DEF_OV_TYPE (BT_OV_UV8HI_USHORTCONSTPTR, BT_UV8HI, BT_USHORTCONSTPTR)
@@ -626,6 +632,7 @@ DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_V8HI, BT_UV8HI, BT_UV8HI, 
BT_V8HI)
 DEF_OV_TYPE (BT_OV_UV8HI_V4SI_V4SI, BT_U

Re: [PATCH] print correct array sizes in errors (PR 87996)

2019-02-07 Thread Jason Merrill

On 2/5/19 4:55 PM, Martin Sebor wrote:

On 2/5/19 12:14 PM, Jason Merrill wrote:

On 2/5/19 1:46 PM, Martin Sebor wrote:

On 2/1/19 7:41 AM, Jason Merrill wrote:

On 1/31/19 5:49 PM, Martin Sebor wrote:

On 1/30/19 3:15 PM, Jason Merrill wrote:

On 1/29/19 7:15 PM, Martin Sebor wrote:

+  /* Try to convert the original SIZE to a ssizetype.  */
+  if (orig_size != error_mark_node
+  && !TYPE_UNSIGNED (TREE_TYPE (orig_size)))
+    {
+  if (TREE_CODE (size) == INTEGER_CST
+  && tree_int_cst_sign_bit (size))
+    diagsize = build_converted_constant_expr (ssizetype, size,
+  tsubst_flags_t ());
+  else if (size == error_mark_node
+   && TREE_CODE (orig_size) == INTEGER_CST
+   && tree_int_cst_sign_bit (orig_size))
+    diagsize = build_converted_constant_expr (ssizetype, 
orig_size,

+  tsubst_flags_t ());
+    }


Using build_converted_constant_expr here looks odd; that's a 
language-level notion, and we're dealing with compiler internals. 
fold_convert seems more appropriate.


Done.




+  if (TREE_CONSTANT (size))
+    {
+  if (!diagsize && TREE_CODE (size) == INTEGER_CST)
+    diagsize = size;
+    }
+  else
 size = osize;
 }

@@ -9732,15 +9758,12 @@ compute_array_index_type_loc (location_t 
name_loc,

   if (TREE_CODE (size) == INTEGER_CST)
 {
   /* An array must have a positive number of elements.  */
-  if (!valid_constant_size_p (size))
+  if (!diagsize)
+    diagsize = size;


It seems like the earlier hunk here is unnecessary; if size is an 
INTEGER_CST, it will be unchanged, and so be used for diagsize in 
the latter hunk without any changes to the earlier location. 
Actually, why not do all of the diagsize logic down here?  There 
doesn't seem to be anything above that relies on information we 
will have lost at this point.


Sure.  Done in the attached revision.



-  tree osize = size;
+  /* The original numeric size as seen in the source code after
+ any substitution and before conversion to size_t.  */
+  tree origsize = NULL_TREE;


Can't you use osize?  instantiate_non_dependent_expr doesn't do any 
actual substitution, it shouldn't change the type of the expression 
or affect whether it's an INTEGER_CST.


I went ahead and reused osize but kept the new name origsize (I assume
avoiding introducing a new variable is what you meant).  The longer
name is more descriptive and also has a comment explaining what it's
for (which is why I didn't touch osize initially -- I didn't know
enough about what it was for or how it might change).


Yep, it was saving the original "size" before conversions.  I guess 
the name change is OK, but please restore the initialization from 
"size", and drop the added comment on the call to mark_rvalue_use 
(which is to possibly replace a lambda capture with its constant value).


How about passing origsize into valid_array_size_p rather than than 
always computing diagsize in the caller?


I'm not sure I understand what you're asking for here.  diagsize
is computed from either size or origsize but valid_array_size_p
takes just one size (or type) argument.  What part do you want
to move to valid_array_size_p (and why)?  Or are you asking to
call fold_convert() in valid_array_size_p instead here?


I was thinking of adding an origsize parameter to valid_array_size_p.

Jason



[Ada] Fix tasking on SPARC/Linux

2019-02-07 Thread Eric Botcazou
The tasking is terminally broken on SPARC/Linux with the mainline compiler.
The problem is also visible on other branches if you compile the runtime with 
assertions enabled.

Tested on SPARC64/Linux, applied on all active branches.


2019-02-07  Eric Botcazou  

* libgnarl/s-linux__sparc.ads (ETIMEDOUT): Set to correct value.

-- 
Eric BotcazouIndex: libgnarl/s-linux__sparc.ads
===
--- libgnarl/s-linux__sparc.ads	(revision 268508)
+++ libgnarl/s-linux__sparc.ads	(working copy)
@@ -70,7 +70,7 @@ package System.Linux is
EINVAL: constant := 22;
ENOMEM: constant := 12;
EPERM : constant := 1;
-   ETIMEDOUT : constant := 110;
+   ETIMEDOUT : constant := 60;
 
-
-- Signals --


Re: [C++PATCH] [PR86379] do not use TREE_TYPE for USING_DECL_SCOPE

2019-02-07 Thread Jason Merrill

On 2/5/19 8:58 PM, Alexandre Oliva wrote:

On Feb  5, 2019, Jason Merrill  wrote:


On Tue, Feb 5, 2019 at 1:37 AM Alexandre Oliva  wrote:

On Jan 31, 2019, Jason Merrill  wrote:


Let's use strip_using_decl instead


Aah, nice!  Thanks, I'll make the changes, test them, and post a new patch.



@@ -13288,7 +13295,8 @@ grok_special_member_properties (tree decl)
{
tree class_type;
-  if (!DECL_NONSTATIC_MEMBER_FUNCTION_P (decl))
+  if (TREE_CODE (decl) != USING_DECL
+  && !DECL_NONSTATIC_MEMBER_FUNCTION_P (decl))
return;



Is there a reason not to use it here, as well?


The using decl will take us to a member of a different class, and this
function takes the DECL_CONTEXT of decl and adjusts the properties of
that class.  If we followed USING_DECLs in decl that early, we'd adjust
(again?) the member properties of USING_DECL_SCOPE(original using decl),
rather than of DECL_CONTEXT (original using decl) as intended.



But a little further in, copy_fn_p will also check
DECL_NONSTATIC_MEMBER_FUNCTION_P and crash.


It would crash if I hadn't adjusted it to test for USING_DECLs, yes.


This function used to do nothing for USING_DECL (since its TREE_TYPE
wasn't METHOD_TYPE), right?  It should continue to do nothing.


I thought I was fixing bugs making certain functions follow USING_DECLs
rather than fail to do what it should, because the test on TREE_TYPE
happened to not pass.  This one I was not so sure about, for the reasons
above.  I guess it makes sense to return early if it really shouldn't do
anything for a USING_DECL, even if it happens to resolve to a method
that it would otherwise handle if declared directly in the class.


Come to think of it, how about fixing DECL_NONSTATIC_MEMBER_FUNCTION_P
to be false for non-functions rather than messing with all these
places that use it?


I considered that at first, but it seemed to me that this would not
bring about the desired behavior in the first functions I touched
(lookup_member vs shared_member), and I thought we'd be better off
retaining some means to catch incorrect uses of this and other macros on
USING_DECLs, so that we can analyze and fix the uses instead of getting
accidental behavior, correct or not.


OK, that makes sense; it isn't always clear what the right handling of a 
USING_DECL is.  In grok_special_member_properties we want to ignore them 
(as you do) because the handling there only applies to immediate 
members, and any inherited properties will be handled when processing 
the bases.


In protected_accessible_p and shared_member_p, if we're left with a 
USING_DECL after strip_using_decl, we can't give a meaningful answer, 
and should probably abort; we shouldn't get here with a dependent 
expression.


The other two changes look fine.

Jason


Xfail gfortran.dg/pr79966.f90

2019-02-07 Thread Jan Hubicka
Hi,
I have added the testcase for PR79966 to test that tp_sum is inlined
becuase it matters for performance of that benchmark. Sadly it is not
inlined anymore because it was inlined due to accounting bug I fixed in
January.

I do not see how to make inliner to do the inlining here except for
setting --param inline-min-speedup=2 which is bit extreme (though i
tested estting it to 5 and it does not add too much of code size).

Inliner does not see why inlining the function is profitable and
previously it did so because of missing multiplication scaling
statement time by iteration count. tp_sum has loop iterating 16 times
which was accounted as loop iterating once making whole funtion chepaer
and making inlining seem more profitable.

OK?

Honza

PR ipa/88711
* gfortran.dg/pr79966.f90: Xfail for time being.
Index: ../../gcc/testsuite/gfortran.dg/pr79966.f90
===
--- ../../gcc/testsuite/gfortran.dg/pr79966.f90 (revision 268579)
+++ ../../gcc/testsuite/gfortran.dg/pr79966.f90 (working copy)
@@ -108,5 +108,5 @@ contains
 
   call RunTPTests()
   end program
-
-! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into 
runtptests/\[0-9\]+" "inline" } }
+! See PR88711. Inliner is currently not able to figure out that inlining 
tp_sum is a good idea.
+! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into 
runtptests/\[0-9\]+" "inline" { xfail spu-*-* } } }


Re: Xfail gfortran.dg/pr79966.f90

2019-02-07 Thread Rainer Orth
Hi Honza,

>   PR ipa/88711
>   * gfortran.dg/pr79966.f90: Xfail for time being.
> Index: ../../gcc/testsuite/gfortran.dg/pr79966.f90
> ===
> --- ../../gcc/testsuite/gfortran.dg/pr79966.f90   (revision 268579)
> +++ ../../gcc/testsuite/gfortran.dg/pr79966.f90   (working copy)
> @@ -108,5 +108,5 @@ contains
>  
>call RunTPTests()
>end program
> -
> -! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into 
> runtptests/\[0-9\]+" "inline" } }
> +! See PR88711. Inliner is currently not able to figure out that inlining 
> tp_sum is a good idea.
> +! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into 
> runtptests/\[0-9\]+" "inline" { xfail spu-*-* } } }

why restrict the xfail to spu only?  According to gcc-testresults, the
test currently fails everywhere (aarch64, arm*, hppa*, i?86, ia64, m68k,
powerpc*, riscv64, sparc*, x86_64).

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [C++PATCH] [PR87322] move cp_evaluated up to tsubst all lambda parms

2019-02-07 Thread Jason Merrill

On 2/7/19 2:24 AM, Alexandre Oliva wrote:


From: Alexandre Oliva 

A lambda capture variable initialized with a lambda expr taking more
than one parameter got us confused.

The first problem was that the parameter list was cut short during
tsubsting because we tsubsted it with cp_unevaluated_operand.  We
reset it right after, to tsubst the function body, so I've moved the
reset up so that it's in effect while processing the parameters as
well.

The second problem was that the lambda expr appeared twice, once in a
decltype that gave the capture variable its type, and once in its
initializer.  This caused us to instantiate two separate lambda exprs
and closure types, and then to flag that the lambda expr in the
initializer could not be converted to the unrelated closure type
determined for the capture variable.  Recording the tsubsted expr in
the local specialization map, and retrieving it for reuse fixed it.

Regstrapped on x86_64- and i686-linux-gnu.  Ok to install?


for  gcc/cp/ChangeLog

PR c++/87322
* pt.c (tsubst_lambda_expr): Avoid duplicate tsubsting.
Move cp_evaluated resetting before signature tsubsting.

for  gcc/testsuite/ChangeLog

PR c++/87322
* g++.dg/cpp1y/pr87322.C: New.
---
  gcc/cp/pt.c  |   18 ++
  gcc/testsuite/g++.dg/cpp1y/pr87322.C |   23 +++
  2 files changed, 37 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp1y/pr87322.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index b8fbf4046f07..3dffa8d2de88 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -17912,8 +17912,17 @@ tsubst_lambda_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
tree oldfn = lambda_function (t);
in_decl = oldfn;
  
+  /* If we have already specialized this lambda expr, reuse it.  See

+ PR c++/86322.  */


Wrong PR number.


+  if (local_specializations)
+if (tree r = retrieve_local_specialization (t))
+  return r;


Hmm, I would expect this to do the wrong thing for pack expansion of a 
lambda, giving us only one rather than one per element of the expansion. 
 For instance, in lambda-variadic5.C we should get three instantiations 
of the "print" function template; can you add that check to the test?


The cp_evaluated change is OK.

Jason


Re: [PATCH] i386: Fix typo in *movoi_internal_avx/movti_internal

2019-02-07 Thread Uros Bizjak
On Thu, Feb 7, 2019 at 5:44 AM H.J. Lu  wrote:
>
> PR target/89229
> * config/i386/i386.md (*movoi_internal_avx): Set mode to OI
> for TARGET_AVX512VL.
> (*movti_internal): Set mode to TI for TARGET_AVX512VL.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386.md | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 9948f77fca5..c1492363bca 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -1938,7 +1938,7 @@
>  (const_string "XI")
>(and (eq_attr "alternative" "1")
> (match_test "TARGET_AVX512VL"))
> -(const_string "XI")
> +(const_string "OI")
>(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
> (and (eq_attr "alternative" "3")
>  (match_test "TARGET_SSE_TYPELESS_STORES")))
> @@ -2017,7 +2017,7 @@
>  (const_string "XI")
>(and (eq_attr "alternative" "3")
> (match_test "TARGET_AVX512VL"))
> -(const_string "XI")
> +(const_string "TI")
>(ior (not (match_test "TARGET_SSE2"))
> (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
>  (and (eq_attr "alternative" "5")
> --
> 2.20.1
>


[PING] [PATCH, RFC] Avoid the -D option which is not available install-sh

2019-02-07 Thread Bernd Edlinger
I'd like to ping for this patch:
https://gcc.gnu.org/ml/gcc-patches/2019-01/msg01779.html

Thanks
Bernd.

On 1/31/19 12:38 PM, Bernd Edlinger wrote:
> Hi,
> 
> I have an issue with the installation of gcc when configured with 
> --enable-languages=all
> on an arm-target where install-sh is used, and make install fails at 
> libphobos as follows:
> 
>   if test -f $file; then \
> /home/ed/gnu/gcc-9-20190127-0/install-sh -c -m 644 -D $file 
> /home/ed/gnu/arm-linux-gnueabihf/lib/gcc/armv7l-unknown-linux-gnueabihf/9.0.1/include/d/$file
>  ; \
>   else \
> /home/ed/gnu/gcc-9-20190127-0/install-sh -c -m 644 -D 
> ../../../../gcc-9-20190127-0/libphobos/libdruntime/$file \
>   
> /home/ed/gnu/arm-linux-gnueabihf/lib/gcc/armv7l-unknown-linux-gnueabihf/9.0.1/include/d/$file
>  ; \
>   fi ; \
> done
> /home/ed/gnu/gcc-9-20190127-0/install-sh: invalid option: -D
> /home/ed/gnu/gcc-9-20190127-0/install-sh: invalid option: -D
> /home/ed/gnu/gcc-9-20190127-0/install-sh: invalid option: -D
> ...
> 
> I have fixed the installation with the attached patch, but when I regenerate 
> the automake
> files using automake-1.15.1 and autoconf-2.69, I have an issue that apparently
> the configure.ac must be out of sync, and the the generated files are missing
> the option --runstatedir no matter what I do.  At least on the source 
> 
> RFC, because I am not sure what the --runstatedir option is, and if it is 
> intentional to remove,
> and forgotten to re-generate, or if was intended to add, and forgotten to 
> check in the
> configure.ac.
> 
> Attached patch which fixes the install issue, and removes the --runstatedir 
> configure option.
> Is OK as is, or has anybody an idea how to fix this mess?
> 
> 
> Thanks
> Bernd.
> 


[committed] [obvious][testsuite] Only run rtl/arm/ldrd-peepholes.c test on arm architecture

2019-02-07 Thread Matthew Malcomson
My previous patch failed to only run an arm test on arm architecture.
This adds that condition to the test.

Committed to trunk as obvious.

gcc/testsuite/ChangeLog:

2019-02-07  Matthew Malcomson  

* gcc.dg/rtl/arm/ldrd-peepholes.c: Only run on arm



### Attachment also inlined for ease of reply###


diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c 
b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
index 
ff209c5df29765441bbe9481ac8caf7bbc6af8f7..4c3949c0963b8482545df670c31db2d9ec0f26b3
 100644
--- a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
+++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target arm*-*-* } } */
 /* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* } { 
"-mthumb" } { "" } } */
 /* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */
 

diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c 
b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
index 
ff209c5df29765441bbe9481ac8caf7bbc6af8f7..4c3949c0963b8482545df670c31db2d9ec0f26b3
 100644
--- a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
+++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target arm*-*-* } } */
 /* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* } { 
"-mthumb" } { "" } } */
 /* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */
 



[PATCH] i386: Add standard scalar operation patterns

2019-02-07 Thread H.J. Lu
Standard scalar operation patterns which preserve the rest of the vector
look like

 (vec_merge:V2DF
   (vec_duplicate:V2DF
 (op:DF (vec_select:DF (reg/v:V2DF 85 [ x ])
(parallel [ (const_int 0 [0])]))
 (reg:DF 87))
   (reg/v:V2DF 85 [ x ])
   (const_int 1 [0x1])]))

Add such pattens to i386 backend and convert VEC_CONCAT patterns to
standard standard scalar operation patterns.

gcc/

PR target/54855
* simplify-rtx.c (simplify_binary_operation_1): Convert
VEC_CONCAT patterns to standard standard scalar operation
patterns.
* config/i386/sse.md (*_vm3): New.
(*_vm3): Likewise.

gcc/testsuite/

PR target/54855
* gcc.target/i386/pr54855-1.c: New test.
* gcc.target/i386/pr54855-2.c: Likewise.
* gcc.target/i386/pr54855-3.c: Likewise.
* gcc.target/i386/pr54855-4.c: Likewise.
* gcc.target/i386/pr54855-5.c: Likewise.
* gcc.target/i386/pr54855-6.c: Likewise.
* gcc.target/i386/pr54855-7.c: Likewise.
---
 gcc/config/i386/sse.md| 45 +
 gcc/simplify-rtx.c| 49 +++
 gcc/testsuite/gcc.target/i386/pr54855-1.c | 16 
 gcc/testsuite/gcc.target/i386/pr54855-2.c | 15 +++
 gcc/testsuite/gcc.target/i386/pr54855-3.c | 14 +++
 gcc/testsuite/gcc.target/i386/pr54855-4.c | 14 +++
 gcc/testsuite/gcc.target/i386/pr54855-5.c | 16 
 gcc/testsuite/gcc.target/i386/pr54855-6.c | 14 +++
 gcc/testsuite/gcc.target/i386/pr54855-7.c | 14 +++
 9 files changed, 197 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr54855-7.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5dc0930ac1f..03b6f3369fc 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1719,6 +1719,28 @@
(set_attr "type" "sseadd")
(set_attr "mode" "")])
 
+;; Standard scalar operation patterns which preserve the rest of the
+;; vector for combiner.
+(define_insn "*_vm3"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
+   (vec_merge:VF_128
+ (vec_duplicate:VF_128
+   (plusminus:
+ (vec_select:
+   (match_operand:VF_128 1 "register_operand" "0,v")
+   (parallel [(const_int 0)]))
+ (match_operand: 2 "nonimmediate_operand" "xm,vm")))
+ (match_dup 1)
+ (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   \t{%2, %0|%0, %2}
+   v\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseadd")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "")])
+
 (define_insn 
"_vm3"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
@@ -1773,6 +1795,29 @@
(set_attr "type" "ssemul")
(set_attr "mode" "")])
 
+;; Standard scalar operation patterns which preserve the rest of the
+;; vector for combiner.
+(define_insn "*_vm3"
+  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
+   (vec_merge:VF_128
+ (vec_duplicate:VF_128
+   (multdiv:
+ (vec_select:
+   (match_operand:VF_128 1 "register_operand" "0,v")
+   (parallel [(const_int 0)]))
+ (match_operand: 2 "nonimmediate_operand" "xm,vm")))
+ (match_dup 1)
+ (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   \t{%2, %0|%0, %2}
+   v\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sse")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "btver2_decode" "direct,double")
+   (set_attr "mode" "")])
+
 (define_insn 
"_vm3"
   [(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 83580a259f3..c32544381d0 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -4023,6 +4023,55 @@ simplify_binary_operation_1 (enum rtx_code code, 
machine_mode mode,
return simplify_gen_binary (VEC_SELECT, mode, XEXP (trueop0, 0),
gen_rtx_PARALLEL (VOIDmode, vec));
  }
+
+   /* Turn
+
+  (vec_concat:V2DF
+(op:DF (vec_select:DF (reg/v:V2DF 85 [ x ])
+  (parallel [ (const_int 0 [0])]))
+  (reg:DF 87))
+(vec_select:DF (reg/v:V2DF 85 [ x ])
+   (parallel [ (const_int 1 [0x1])])))
+
+  into standard scalar operation patterns which preserve the
+  rest of the vector:
+
+  (vec_merge:V2DF
+(vec_duplicate:V2DF
+  (op:DF (vec

Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64

2019-02-07 Thread Wilco Dijkstra
Hi Steve,

>> After special cases you could do something like t = mask2 + (HWI_1U << 
>> shift);
>> return t == (t & -t) to check for a valid bfi.
>
> I am not sure I follow this logic and my attempts to use this did not
> work so I kept my original code.

It's similar to the initial code in aarch64_bitmask_imm, but rather than adding
the lowest bit to the value to verify it is a mask (val + (val & -val)), we use 
the
shift instead. If the shift is exactly right, it reaches the first set bit of 
the mask. 
Adding the low bit to a valid mask always results in zero or a single set bit. 
The standard idiom to check that is t == (t & -t).

>> +  "bfi\t%0, %1, 0, %P2"
>> 
>> This could emit a width that may be 32 too large in SImode if bit 31 is set
>> (there is another use of %P in aarch64.md which may have the same
>> issue).
>
> I am not sure why having bit 31 set would be a problem.  Sign
> extension?

Yes, if bit 31 is set, %P will emit 33 for a 32-bit constant which is obviously 
wrong.
Your patch avoids this for bfi by explicitly computing the correct value.

This looks good to me (and creates useful bfi's as expected), but I can't 
approve.

Wilco








[New test fortran, committed] PR 52789 gfortran sets -Wunused-parameter in the C sense as well as the Fortran sense

2019-02-07 Thread Dominique d'Humières
New test committed as obvious at revision r268656.

Dominique



Re: [PATCH] print correct array sizes in errors (PR 87996)

2019-02-07 Thread Martin Sebor

On 2/7/19 9:10 AM, Jason Merrill wrote:

On 2/5/19 4:55 PM, Martin Sebor wrote:

On 2/5/19 12:14 PM, Jason Merrill wrote:

On 2/5/19 1:46 PM, Martin Sebor wrote:

On 2/1/19 7:41 AM, Jason Merrill wrote:

On 1/31/19 5:49 PM, Martin Sebor wrote:

On 1/30/19 3:15 PM, Jason Merrill wrote:

On 1/29/19 7:15 PM, Martin Sebor wrote:

+  /* Try to convert the original SIZE to a ssizetype.  */
+  if (orig_size != error_mark_node
+  && !TYPE_UNSIGNED (TREE_TYPE (orig_size)))
+    {
+  if (TREE_CODE (size) == INTEGER_CST
+  && tree_int_cst_sign_bit (size))
+    diagsize = build_converted_constant_expr (ssizetype, size,
+  tsubst_flags_t ());
+  else if (size == error_mark_node
+   && TREE_CODE (orig_size) == INTEGER_CST
+   && tree_int_cst_sign_bit (orig_size))
+    diagsize = build_converted_constant_expr (ssizetype, 
orig_size,

+  tsubst_flags_t ());
+    }


Using build_converted_constant_expr here looks odd; that's a 
language-level notion, and we're dealing with compiler internals. 
fold_convert seems more appropriate.


Done.




+  if (TREE_CONSTANT (size))
+    {
+  if (!diagsize && TREE_CODE (size) == INTEGER_CST)
+    diagsize = size;
+    }
+  else
 size = osize;
 }

@@ -9732,15 +9758,12 @@ compute_array_index_type_loc (location_t 
name_loc,

   if (TREE_CODE (size) == INTEGER_CST)
 {
   /* An array must have a positive number of elements.  */
-  if (!valid_constant_size_p (size))
+  if (!diagsize)
+    diagsize = size;


It seems like the earlier hunk here is unnecessary; if size is an 
INTEGER_CST, it will be unchanged, and so be used for diagsize in 
the latter hunk without any changes to the earlier location. 
Actually, why not do all of the diagsize logic down here?  There 
doesn't seem to be anything above that relies on information we 
will have lost at this point.


Sure.  Done in the attached revision.



-  tree osize = size;
+  /* The original numeric size as seen in the source code after
+ any substitution and before conversion to size_t.  */
+  tree origsize = NULL_TREE;


Can't you use osize?  instantiate_non_dependent_expr doesn't do any 
actual substitution, it shouldn't change the type of the expression 
or affect whether it's an INTEGER_CST.


I went ahead and reused osize but kept the new name origsize (I assume
avoiding introducing a new variable is what you meant).  The longer
name is more descriptive and also has a comment explaining what it's
for (which is why I didn't touch osize initially -- I didn't know
enough about what it was for or how it might change).


Yep, it was saving the original "size" before conversions.  I guess 
the name change is OK, but please restore the initialization from 
"size", and drop the added comment on the call to mark_rvalue_use 
(which is to possibly replace a lambda capture with its constant value).


How about passing origsize into valid_array_size_p rather than than 
always computing diagsize in the caller?


I'm not sure I understand what you're asking for here.  diagsize
is computed from either size or origsize but valid_array_size_p
takes just one size (or type) argument.  What part do you want
to move to valid_array_size_p (and why)?  Or are you asking to
call fold_convert() in valid_array_size_p instead here?


I was thinking of adding an origsize parameter to valid_array_size_p.


The function has 6 callers and only this one would pass it this extra
argument(*).  The others pass it the array type, not size (something
I did to avoid adding an argument to it), so they would pass it null
as this new argument.

Adding an argument to the function that's unrelated to its purpose
only to simplify one caller doesn't feel right to me.

So if I'm not missing something and this was just a suggestion
to consider, I would prefer to keep the computation where it is.
I have simplified it a bit and eliminated some unnecessary logic.
Otherwise, if it's a precondition of approving the patch, please
let me know.

Attached is the updated patch with the removed comment and with
the slightly simpler logic in compute_array_index_type_loc.

Martin

[*] We only really need one bit: the signedness of the original
type.  With the simplification its value is no longer used.
PR c++/87996 - size of array is negative error when SIZE_MAX/2 < sizeof(array) <= SIZE_MAX

gcc/ChangeLog:

	PR c++/87996
	* builtins.c (max_object_size): Move from here...
	* builtins.h (max_object_size): ...and here...
	* tree.c (max_object_size): ...to here...
	* tree.h (max_object_size): ...and here.

gcc/c-family/ChangeLog:

	PR c++/87996
	* c-common.c (invalid_array_size_error): New function.
	(valid_array_size_p): Call it.  Handle size as well as type.
	* c-common.h (valid_constant_size_p): New function.
	(enum cst_size_error): New type.

gcc/cp/ChangeLog:

	PR c++/87996
	* decl.c (compute_array_inde

[PATCH] Fix more ICEs in -fsave-optimization-record (PR tree-optimization/89235)

2019-02-07 Thread David Malcolm
PR tree-optimization/89235 reports an ICE inside -fsave-optimization-record
whilst reporting the inlining chain of of the location_t in the
vect_location global.

This is very similar to PR tree-optimization/86637, fixed in r266821.

The issue is that the inlining chains are read from the location_t's
ad-hoc data, referencing GC-managed tree blocks, but the former are
not GC roots; it's simply assumed that old locations referencing dead
blocks never get used again.

The fix is to reset the "vect_location" global in more places.  Given
that is a somewhat subtle detail, the patch adds a sentinel class to
reset vect_location at the end of a scope.  Doing it as a class
simplifies the task of ensuring that the global is reset on every
exit path from a function, and also gives a good place to signpost
the above subtlety (in the documentation for the class).

The patch also adds test cases for both of the PRs mentioned above.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.

OK for trunk?

gcc/testsuite/ChangeLog:
PR tree-optimization/86637
PR tree-optimization/89235
* gcc.c-torture/compile/pr86637-1.c: New test.
* gcc.c-torture/compile/pr86637-2.c: New test.
* gcc.c-torture/compile/pr86637-3.c: New test.
* gcc.c-torture/compile/pr89235.c: New test.

gcc/ChangeLog:
PR tree-optimization/86637
PR tree-optimization/89235
* tree-vect-loop.c (optimize_mask_stores): Add an
auto_purge_vect_location sentinel to ensure that vect_location is
purged on exit.
* tree-vectorizer.c
(auto_purge_vect_location::~auto_purge_vect_location): New dtor.
(try_vectorize_loop_1): Add an auto_purge_vect_location sentinel
to ensure that vect_location is purged on exit.
(pass_slp_vectorize::execute): Likewise, replacing the manual
reset.
* tree-vectorizer.h (class auto_purge_vect_location): New class.
---
 gcc/testsuite/gcc.c-torture/compile/pr86637-1.c |  13 +++
 gcc/testsuite/gcc.c-torture/compile/pr86637-2.c | 128 
 gcc/testsuite/gcc.c-torture/compile/pr86637-3.c |  11 ++
 gcc/testsuite/gcc.c-torture/compile/pr89235.c   |  57 +++
 gcc/tree-vect-loop.c|   1 +
 gcc/tree-vectorizer.c   |  13 ++-
 gcc/tree-vectorizer.h   |  18 
 7 files changed, 239 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-2.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-3.c
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr89235.c

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c
new file mode 100644
index 000..61b6381
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c
@@ -0,0 +1,13 @@
+/* { dg-options "-fsave-optimization-record -ftree-slp-vectorize --param 
ggc-min-expand=1 --param ggc-min-heapsize=1024" } */
+
+void
+en (void)
+{
+}
+
+void
+n4 (int zb)
+{
+  while (zb < 1)
+++zb;
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c
new file mode 100644
index 000..3b675ea
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c
@@ -0,0 +1,128 @@
+/* { dg-do compile { target fgraphite } } */
+/* { dg-options "-floop-parallelize-all -fsave-optimization-record 
-ftree-parallelize-loops=2 -ftree-slp-vectorize" } */
+
+#include 
+#include 
+
+signed char qq;
+int rm, mu, l9;
+long long unsigned int ip;
+
+void
+fi (void)
+{
+}
+
+void
+il (long long unsigned int c5)
+{
+  int *na = μ
+
+  while (l9 < 1)
+{
+  if (qq < 1)
+{
+  short int ds = 0;
+
+  if ((ip - *na - ip / c5 != 0) && (*na / ((c5 + c5) && !!ip) != 0))
+__builtin_trap ();
+
+  rm = -1 / (!!rm && !!c5);
+
+  while (qq < 1)
+{
+  ++*na;
+  ++ip;
+  if (*na < (int) ip)
+while (ds < 2)
+  {
+++qq;
+++ds;
+  }
+}
+}
+
+  ++l9;
+}
+
+  for (;;)
+{
+}
+}
+
+void
+uu (short int wk)
+{
+  int64_t tl = ip;
+
+  while (ip < 2)
+{
+  signed char vn;
+
+  for (vn = 1; vn != 0; ++vn)
+{
+  int rh;
+
+  if (qq < 1)
+{
+  while (mu < 1)
+ip = 0;
+
+  while (rm != 0)
+++rm;
+}
+
+  for (rh = 0; rh < 3; ++rh)
+{
+  int ab;
+
+  for (ab = 0; ab < 5; ++ab)
+{
+  tl -= qq;
+  wk += rh - tl;
+  ip += (ab < wk) + wk;
+}
+}
+}
+}
+}
+
+void
+im (uint8_t kt)
+{
+  int wu = 0;
+
+

Re: [PATCH] Fix PR89150, GC of tree-form bitmaps

2019-02-07 Thread Michael Ploujnikov
On 2019-02-04 11:39 a.m., Richard Biener wrote:
> On February 4, 2019 5:07:00 PM GMT+01:00, Jeff Law  wrote:
>> On 2/4/19 6:15 AM, Richard Biener wrote:
>>>
>>> When I introduced tree-form bitmaps I forgot to think about GC.
>>> The following drops the chain_prev annotation to make the marker
>>> work for trees.  I've also maked the obstack member GTY skip
>>> (and prevent bitmap_obstack from gengtype processing) because
>>> the obstack isn't used for GC allocated bitmaps.
>>>
>>> Bootstrap & regtest running on x86_64-unknown-linux-gnu.
>>>
>>> Richard.
>>>
>>> 2019-02-04  Richard Biener  
>>>
>>> PR middle-end/89150
>>> * bitmap.h (struct bitmap_obstack): Do not mark GTY.
>>> (struct bitmap_element): Drop chain_prev so we properly recurse on
>>> the prev member, supporting tree views.
>>> (struct bitmap_head): GTY skip the obstack member.
>> Was there a particular failure mode you observed or was this discovered
>> by inspection.
> 
> It was discovered via an out of tree patch, the issue is only latent on trunk 
> (together with another unfixed pch issue of bitmaps). 

My apologies for not noticing this thread/PR earlier. I originally discovered 
this issue while working on an experimental patch, but since then I've also 
come up with a test case for this specific issue. Is it worth adding it to 
trunk?

- Michael
From 9f1049a1e24cd1aa9852c51342dce0226416 Mon Sep 17 00:00:00 2001
From: Michael Ploujnikov 
Date: Thu, 7 Feb 2019 14:43:40 -0500
Subject: [PATCH] Add a test for GC marking bitmaps in tree view mode

gcc/ChangeLog:

2019-02-07  Michael Ploujnikov  

	PR middle-end/89150
	* bitmap.c (test_bitmap_tree_marking): New test.
	(NOT_NULL_OR_GARBAGE): For shortening
	test_bitmap_tree_marking.
	(bitmap_c_tests): Add test_bitmap_tree_marking.
---
 gcc/bitmap.c | 82 
 1 file changed, 82 insertions(+)

diff --git gcc/bitmap.c gcc/bitmap.c
index 5a8236de75..7e802a1600 100644
--- gcc/bitmap.c
+++ gcc/bitmap.c
@@ -2626,6 +2626,11 @@ bitmap_head::dump ()
 
 #if CHECKING_P
 
+static GTY(()) bitmap selftest_test_bitmap;
+
+/* From ggc-internal.h */
+extern bool ggc_force_collect;
+
 namespace selftest {
 
 /* Selftests for bitmaps.  */
@@ -2722,6 +2727,82 @@ test_bitmap_single_bit_set_p ()
   ASSERT_EQ (1066, bitmap_first_set_bit (b));
 }
 
+#define NOT_NULL_OR_GARBAGE(NODE) \
+  (NODE != NULL && (unsigned long)(NODE) != 0xa5a5a5a5UL)
+
+static void
+test_bitmap_tree_marking ()
+{
+  selftest_test_bitmap = BITMAP_GGC_ALLOC ();
+  bitmap_tree_view (selftest_test_bitmap);
+
+  ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 1*BITMAP_ELEMENT_ALL_BITS));
+  ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 5*BITMAP_ELEMENT_ALL_BITS));
+  ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 10*BITMAP_ELEMENT_ALL_BITS));
+  ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 15*BITMAP_ELEMENT_ALL_BITS));
+  ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 30*BITMAP_ELEMENT_ALL_BITS));
+  ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 25*BITMAP_ELEMENT_ALL_BITS));
+  ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 3*BITMAP_ELEMENT_ALL_BITS));
+  ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 4*BITMAP_ELEMENT_ALL_BITS));
+  ASSERT_TRUE (bitmap_set_bit (selftest_test_bitmap, 26*BITMAP_ELEMENT_ALL_BITS));
+  /* At this point it should look like:
+  26
+ /  \
+25  30
+   /
+  5
+ / \
+4  15
+   /  /
+  3  10
+ /
+1
+
+  */
+  ggc_force_collect = true;
+  ggc_collect ();
+  ggc_force_collect = false;
+
+  ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first));
+  ASSERT_TRUE (selftest_test_bitmap->first->indx == 26);
+
+  ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first->next));
+  ASSERT_TRUE (selftest_test_bitmap->first->next->indx == 30);
+  ASSERT_TRUE (selftest_test_bitmap->first->next->next == NULL);
+  ASSERT_TRUE (selftest_test_bitmap->first->next->prev == NULL);
+
+  ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first->prev));
+  ASSERT_TRUE (selftest_test_bitmap->first->prev->indx == 25);
+  ASSERT_TRUE (selftest_test_bitmap->first->prev->next == NULL);
+
+  ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first->prev->prev));
+  ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->indx == 5);
+
+  ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first->prev->prev->next));
+  ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->next->indx == 15);
+  ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->next->next == NULL);
+
+  ASSERT_TRUE (NOT_NULL_OR_GARBAGE (selftest_test_bitmap->first->prev->prev->next->prev));
+  ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->next->prev->indx == 10);
+  ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->next->prev->prev == NULL);
+  ASSERT_TRUE (selftest_test_bitmap->first->prev->prev->next->prev->next == NULL);
+
+  ASSERT_TRUE (NOT_NULL_OR_GARBAGE 

Re: [PATCH] Fix PR89150, GC of tree-form bitmaps

2019-02-07 Thread Jakub Jelinek
On Thu, Feb 07, 2019 at 03:04:21PM -0500, Michael Ploujnikov wrote:
> 2019-02-07  Michael Ploujnikov  
> 
>   PR middle-end/89150
>   * bitmap.c (test_bitmap_tree_marking): New test.
>   (NOT_NULL_OR_GARBAGE): For shortening
>   test_bitmap_tree_marking.
>   (bitmap_c_tests): Add test_bitmap_tree_marking.

Could you do that instead in a plugin in the testsuite?
I mean, the patch is adding garbage collection roots, so it will not affect
just -fself-tests run, but also any time the compiler will do GC.

Jakub


Re: [PATCH] Fix more ICEs in -fsave-optimization-record (PR tree-optimization/89235)

2019-02-07 Thread Richard Biener
On February 7, 2019 9:10:15 PM GMT+01:00, David Malcolm  
wrote:
>PR tree-optimization/89235 reports an ICE inside
>-fsave-optimization-record
>whilst reporting the inlining chain of of the location_t in the
>vect_location global.
>
>This is very similar to PR tree-optimization/86637, fixed in r266821.
>
>The issue is that the inlining chains are read from the location_t's
>ad-hoc data, referencing GC-managed tree blocks, but the former are
>not GC roots; it's simply assumed that old locations referencing dead
>blocks never get used again.
>
>The fix is to reset the "vect_location" global in more places.  Given
>that is a somewhat subtle detail, the patch adds a sentinel class to
>reset vect_location at the end of a scope.  Doing it as a class
>simplifies the task of ensuring that the global is reset on every
>exit path from a function, and also gives a good place to signpost
>the above subtlety (in the documentation for the class).
>
>The patch also adds test cases for both of the PRs mentioned above.
>
>Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
>
>OK for trunk?

OK. 

Richard. 

>gcc/testsuite/ChangeLog:
>   PR tree-optimization/86637
>   PR tree-optimization/89235
>   * gcc.c-torture/compile/pr86637-1.c: New test.
>   * gcc.c-torture/compile/pr86637-2.c: New test.
>   * gcc.c-torture/compile/pr86637-3.c: New test.
>   * gcc.c-torture/compile/pr89235.c: New test.
>
>gcc/ChangeLog:
>   PR tree-optimization/86637
>   PR tree-optimization/89235
>   * tree-vect-loop.c (optimize_mask_stores): Add an
>   auto_purge_vect_location sentinel to ensure that vect_location is
>   purged on exit.
>   * tree-vectorizer.c
>   (auto_purge_vect_location::~auto_purge_vect_location): New dtor.
>   (try_vectorize_loop_1): Add an auto_purge_vect_location sentinel
>   to ensure that vect_location is purged on exit.
>   (pass_slp_vectorize::execute): Likewise, replacing the manual
>   reset.
>   * tree-vectorizer.h (class auto_purge_vect_location): New class.
>---
> gcc/testsuite/gcc.c-torture/compile/pr86637-1.c |  13 +++
>gcc/testsuite/gcc.c-torture/compile/pr86637-2.c | 128
>
> gcc/testsuite/gcc.c-torture/compile/pr86637-3.c |  11 ++
> gcc/testsuite/gcc.c-torture/compile/pr89235.c   |  57 +++
> gcc/tree-vect-loop.c|   1 +
> gcc/tree-vectorizer.c   |  13 ++-
> gcc/tree-vectorizer.h   |  18 
> 7 files changed, 239 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-1.c
> create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-2.c
> create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr86637-3.c
> create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr89235.c
>
>diff --git a/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c
>b/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c
>new file mode 100644
>index 000..61b6381
>--- /dev/null
>+++ b/gcc/testsuite/gcc.c-torture/compile/pr86637-1.c
>@@ -0,0 +1,13 @@
>+/* { dg-options "-fsave-optimization-record -ftree-slp-vectorize
>--param ggc-min-expand=1 --param ggc-min-heapsize=1024" } */
>+
>+void
>+en (void)
>+{
>+}
>+
>+void
>+n4 (int zb)
>+{
>+  while (zb < 1)
>+++zb;
>+}
>diff --git a/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c
>b/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c
>new file mode 100644
>index 000..3b675ea
>--- /dev/null
>+++ b/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c
>@@ -0,0 +1,128 @@
>+/* { dg-do compile { target fgraphite } } */
>+/* { dg-options "-floop-parallelize-all -fsave-optimization-record
>-ftree-parallelize-loops=2 -ftree-slp-vectorize" } */
>+
>+#include 
>+#include 
>+
>+signed char qq;
>+int rm, mu, l9;
>+long long unsigned int ip;
>+
>+void
>+fi (void)
>+{
>+}
>+
>+void
>+il (long long unsigned int c5)
>+{
>+  int *na = μ
>+
>+  while (l9 < 1)
>+{
>+  if (qq < 1)
>+{
>+  short int ds = 0;
>+
>+  if ((ip - *na - ip / c5 != 0) && (*na / ((c5 + c5) && !!ip)
>!= 0))
>+__builtin_trap ();
>+
>+  rm = -1 / (!!rm && !!c5);
>+
>+  while (qq < 1)
>+{
>+  ++*na;
>+  ++ip;
>+  if (*na < (int) ip)
>+while (ds < 2)
>+  {
>+++qq;
>+++ds;
>+  }
>+}
>+}
>+
>+  ++l9;
>+}
>+
>+  for (;;)
>+{
>+}
>+}
>+
>+void
>+uu (short int wk)
>+{
>+  int64_t tl = ip;
>+
>+  while (ip < 2)
>+{
>+  signed char vn;
>+
>+  for (vn = 1; vn != 0; ++vn)
>+{
>+  int rh;
>+
>+  if (qq < 1)
>+{
>+  while (mu < 1)
>+ip = 0;
>+
>+  while (rm != 0)
>+++rm;
>+}
>+
>+  for (rh = 0; rh < 3; ++rh)
>+{
>+  int ab;
>+
>+  for (ab = 0; ab < 5; ++

[PATCH] i386: Use OI/TImode in *mov[ot]i_internal_avx with AVX512VL

2019-02-07 Thread H.J. Lu
OImode and TImode moves must be done in XImode to access upper 16
vector registers without AVX512VL.  With AVX512VL, we can access
upper 16 vector registers in OImode and TImode.

PR target/89229
* config/i386/i386.md (*movoi_internal_avx): Set mode to XI for
upper 16 vector registers without TARGET_AVX512VL.
(*movti_internal): Likewise.
---
 gcc/config/i386/i386.md | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c1492363bca..e7f4b9a9c8d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1933,8 +1933,9 @@
(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
(set_attr "prefix" "vex")
(set (attr "mode")
-   (cond [(ior (match_operand 0 "ext_sse_reg_operand")
-   (match_operand 1 "ext_sse_reg_operand"))
+   (cond [(and (ior (match_operand 0 "ext_sse_reg_operand")
+(match_operand 1 "ext_sse_reg_operand"))
+   (match_test "!TARGET_AVX512VL"))
 (const_string "XI")
   (and (eq_attr "alternative" "1")
(match_test "TARGET_AVX512VL"))
@@ -2012,8 +2013,9 @@
(set (attr "mode")
(cond [(eq_attr "alternative" "0,1")
 (const_string "DI")
-  (ior (match_operand 0 "ext_sse_reg_operand")
-   (match_operand 1 "ext_sse_reg_operand"))
+  (and (ior (match_operand 0 "ext_sse_reg_operand")
+(match_operand 1 "ext_sse_reg_operand"))
+   (match_test "!TARGET_AVX512VL"))
 (const_string "XI")
   (and (eq_attr "alternative" "3")
(match_test "TARGET_AVX512VL"))
-- 
2.20.1



[PATCH] Fix ICE due to copy_reg_eh_region_note_forward (PR rtl-optimization/89234)

2019-02-07 Thread Jakub Jelinek
Hi!

The following testcase ICEs on ppc64le.  The problem is that
copy_reg_eh_region_note_* functions accept either some instruction, or
REG_EH_REGION note directly.  To differentiate between those it uses INSN_P
test (and returns early if the insn doesn't contain any REG_EH_REGION
notes).  If the function is called on a rtx_insn * that isn't INSN_P, like
on the testcase on a NOTE, it assumes it must be REG_EH_REGION note, uses
XEXP (note, 0) on it, which is actually PREV_INSN in this case and stores
an instruction (JUMP_INSN in this case) into REG_EH_REGION notes it creates.

I believe we should treat rtx_insn * that aren't INSN_P like instructions
that don't have any REG_EH_REGION notes.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-07  Jakub Jelinek  

PR rtl-optimization/89234
* except.c (copy_reg_eh_region_note_forward): Return if note_or_insn
is a NOTE, CODE_LABEL etc. - rtx_insn * other than INSN_P.
(copy_reg_eh_region_note_backward): Likewise.

* g++.dg/ubsan/pr89234.C: New test.

--- gcc/except.c.jj 2019-01-10 11:43:14.387377695 +0100
+++ gcc/except.c2019-02-07 15:11:27.756869475 +0100
@@ -1756,6 +1756,8 @@ copy_reg_eh_region_note_forward (rtx not
   if (note == NULL)
return;
 }
+  else if (is_a  (note_or_insn))
+return;
   note = XEXP (note, 0);
 
   for (insn = first; insn != last ; insn = NEXT_INSN (insn))
@@ -1778,6 +1780,8 @@ copy_reg_eh_region_note_backward (rtx no
   if (note == NULL)
return;
 }
+  else if (is_a  (note_or_insn))
+return;
   note = XEXP (note, 0);
 
   for (insn = last; insn != first; insn = PREV_INSN (insn))
--- gcc/testsuite/g++.dg/ubsan/pr89234.C.jj 2019-02-07 17:12:02.081024666 
+0100
+++ gcc/testsuite/g++.dg/ubsan/pr89234.C2019-02-07 17:08:55.026097949 
+0100
@@ -0,0 +1,11 @@
+// PR rtl-optimization/89234
+// { dg-do compile { target dfp } }
+// { dg-options "-O2 -fnon-call-exceptions -fsanitize=null" }
+
+typedef float __attribute__((mode (SD))) _Decimal32;
+
+void
+foo (_Decimal32 *b, _Decimal32 c)
+{
+  *b = c + 1.5;
+}

Jakub


[PATCH] Narrow ARRAY*_REF indexes to sizetype for gimple (PR tree-optimization/89223)

2019-02-07 Thread Jakub Jelinek
Hi!

As mentioned in the PR, given that during expansion we expand ARRAY_REFs
using get_inner_reference that casts ARRAY_*REF indexes to sizetype, all the
bits above sizetype are ignored (whether one uses long long indexes on
32-bit targets or __int128 indexes on 64-bit ones), so I think it is
desirable to make that visible already for GIMPLE optimizations, where we
can narrow other arithmetic stmts feeding those indexes etc., it could help
vectorization (e.g. gather/scatter), etc.  Of course, fixing tree-data-ref.c
etc. to cope with wider constants or chrecs is desirable as well.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-06  Jakub Jelinek  

PR tree-optimization/89223
* gimplify.c (gimplify_compound_lval): Narrow ARRAY*_REF indexes wider
than sizetype to sizetype.

* gcc.c-torture/execute/pr89223.c: New test.

--- gcc/gimplify.c.jj   2019-01-28 23:30:53.199762928 +0100
+++ gcc/gimplify.c  2019-02-06 17:15:35.368976785 +0100
@@ -2977,6 +2977,12 @@ gimplify_compound_lval (tree *expr_p, gi
 
   if (TREE_CODE (t) == ARRAY_REF || TREE_CODE (t) == ARRAY_RANGE_REF)
{
+ /* Expansion will cast the index to sizetype, so any excess bits
+aren't useful for optimizations.  */
+ if (!error_operand_p (TREE_OPERAND (t, 1))
+ && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (t, 1)))
+ > TYPE_PRECISION (sizetype)))
+   TREE_OPERAND (t, 1) = fold_convert (sizetype, TREE_OPERAND (t, 1));
  /* Gimplify the dimension.  */
  if (!is_gimple_min_invariant (TREE_OPERAND (t, 1)))
{
--- gcc/testsuite/gcc.c-torture/execute/pr89223.c.jj2019-02-07 
18:09:39.869088769 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr89223.c   2019-02-07 
18:09:12.359543231 +0100
@@ -0,0 +1,27 @@
+/* PR tree-optimization/89223 */
+/* { dg-do run { target int128 } } */
+
+int a[9] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+unsigned __int128 b;
+
+__attribute__((noipa)) void
+foo (void)
+{
+  for (b = (((unsigned __int128) 4) << 64) + 4; b != 0; b -= unsigned 
__int128) 1) << 64) + 1))
+a[b] = a[b + b];
+}
+
+int
+main ()
+{
+  int i;
+  if (sizeof (__UINTPTR_TYPE__) * __CHAR_BIT__ != 64
+  || sizeof (__SIZE_TYPE__) * __CHAR_BIT__ != 64
+  || sizeof (__int128) * __CHAR_BIT__ != 128)
+return 0;
+  foo ();
+  for (i = 0; i < 9; ++i)
+if (a[i] != (((1 << i) & 0x16) != 0 ? 9 : i == 3 ? 7 : i + 1))
+  __builtin_abort ();
+  return 0;
+}

Jakub


[PATCH] Fix up pre_and_rev_post_order_compute_fn

2019-02-07 Thread Jakub Jelinek
Hi!

I've noticed pre_and_rev_post_order_compute_fn is one of (apparently many)
functions that take a struct function * argument and accept any function, as
long as it is cfun.  For a patch I've been working on I actually need it
to handle other functions as well and in this case it is trivial to fix.
I've seen several others e.g. in graph.c, though some of those are harder to
deal with, as some there are calls to functions that don't have a variant
with struct function * argument and assume cfun implicitly.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-07  Jakub Jelinek  

* cfganal.c (pre_and_rev_post_order_compute_fn): Use fn instead of
cfun everywhere.

--- gcc/cfganal.c.jj2019-01-01 12:37:17.915962514 +0100
+++ gcc/cfganal.c   2019-02-07 20:47:29.035520143 +0100
@@ -951,10 +951,10 @@ pre_and_rev_post_order_compute_fn (struc
   bool include_entry_exit)
 {
   int pre_order_num = 0;
-  int rev_post_order_num = n_basic_blocks_for_fn (cfun) - 1;
+  int rev_post_order_num = n_basic_blocks_for_fn (fn) - 1;
 
   /* Allocate stack for back-tracking up CFG.  */
-  auto_vec stack (n_basic_blocks_for_fn (cfun) + 1);
+  auto_vec stack (n_basic_blocks_for_fn (fn) + 1);
 
   if (include_entry_exit)
 {
@@ -968,7 +968,7 @@ pre_and_rev_post_order_compute_fn (struc
 rev_post_order_num -= NUM_FIXED_BLOCKS;
 
   /* Allocate bitmap to track nodes that have been visited.  */
-  auto_sbitmap visited (last_basic_block_for_fn (cfun));
+  auto_sbitmap visited (last_basic_block_for_fn (fn));
 
   /* None of the nodes in the CFG have been visited yet.  */
   bitmap_clear (visited);

Jakub


Re: [C++ Patch] PR 88986 ("[7/8/9 Regression] ICE: tree check: expected tree that contains 'decl minimal' structure, have 'error_mark' in member_vec_binary_search, at cp/name-lookup.c:1136")

2019-02-07 Thread Jason Merrill

On 2/5/19 4:39 AM, Paolo Carlini wrote:

Hi,

On 04/02/19 17:48, Jason Merrill wrote:
On Mon, Feb 4, 2019 at 11:00 AM Paolo Carlini 
 wrote:

On 04/02/19 15:47, Jason Merrill wrote:

On 2/1/19 3:52 PM, Paolo Carlini wrote:

Hi,

I think that this ICE on invalid (and valid, for c++17+) can be in
fact avoided by accepting in make_typename_type a TYPE_PACK_EXPANSION
as context, thus by not triggering the "‘T ...’ is not a class"
error. Not sure if a better fix would be something more general.
Note, anyway, that we are asserting TYPE_P (context) thus
TYPE_PACK_EXPANSIONs definitely get through beyond MAYBE_CLASS_TYPE_P.

The testcase should test that the using actually works, i.e. imports a
type from a base class.

Uhm, if I change the testcase to something like:

struct B { typedef int type; };

template struct C : T... {
    using typename T::type ...;
    void f() { type value; }
};

template class C;

we get a "sorry, unimplemented: use of ‘type_pack_expansion’ in
template" for value

I suspected that would happen.


which, arguably, is better than the current ICE,
but I'm not sure if we are close to completing the implementation of
this / we even want to attempt that at this Stage?!?

Well, I'd look at how we implement the equivalent for e.g. a variable:

struct A { static int i; };
template  struct B: T... {
   using T::i ...;
};
auto x = B::i; // works

and try to use that code path for the typename case as well.


Yes. Note that, AFAICS, the code you added to implement P0195R2 works 
fine for typename too. The problem happens when we actually encounter 
inside the class something like:


     void f() { type value; }

which, so to speak, appears to expand to a set of types: 'type' is a 
TYPENAME_TYPE which has a TYPE_PACK_EXPANSION as TYPE_CONTEXT.


I wonder about replacing uses of 'type' with a TYPENAME_TYPE with the 
current class as TYPE_CONTEXT?  But your approach is OK, too.


Jason


Re: [PATCH] print correct array sizes in errors (PR 87996)

2019-02-07 Thread Jason Merrill

On 2/7/19 1:57 PM, Martin Sebor wrote:

+  /* The original numeric size as seen in the source code after
+ any substitution and before conversion to size_t.  */


I don't think this should mention substitution.  With that tweak the C++ 
changes are OK.


Jason



C++ PATCH for c++/89217 - ICE with list-initialization in range-based for loop

2019-02-07 Thread Marek Polacek
Since r268321 we can call digest_init even in a template, when the compound
literal isn't instantiation-dependent.  Consequently, when we get to
case RANGE_FOR_STMT in tsubst_expr, RANGE_FOR_EXPR might already have been
digested, as in this case, where before digesting it was

  {*((struct S *) this)->r}

and now it's

  TARGET_EXPR r}>

(that's correct).  Now, in tsubst_expr, we recurse on the latter:
17095 expr = RECUR (RANGE_FOR_EXPR (t));
and since the expression contains a COMPONENT_REF, we end up calling
finish_non_static_data_member which calls build_class_member_access_expr
with preserve_reference=false.  Thus, after we've tsubst'd the RANGE_FOR_EXPR,
we have

  TARGET_EXPR r}>

Nevermind those casts, but "(struct R *) *((struct S *) this)->r" causes
problems later in cp_fold -> cp_convert_to_pointer because we're trying to
convert "*((struct S *) this)->r" of type R to R *.  That errors and the
error_mark_node causes grief.

My first attempt was to handle this in tsubst_copy_and_build's case
COMPONENT_REF, after the call to finish_non_static_data_member, but that
breaks -- we can't have a LHS of a MODIFY_EXPR of a reference type.  So it
seems this needs to be handled closer to where it actually fails, for instance
in ocp_convert.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2019-02-07  Marek Polacek  

PR c++/89217 - ICE with list-initialization in range-based for loop.
* cvt.c (ocp_convert): Unwrap REFERENCE_REF_P in a COMPONENT_REF.

* g++.dg/cpp0x/range-for37.C: New test.

diff --git gcc/cp/cvt.c gcc/cp/cvt.c
index 82a44f353c7..92677cff0c5 100644
--- gcc/cp/cvt.c
+++ gcc/cp/cvt.c
@@ -857,7 +857,15 @@ ocp_convert (tree type, tree expr, int convtype, int flags,
   return ignore_overflows (converted, e);
 }
   if (INDIRECT_TYPE_P (type) || TYPE_PTRMEM_P (type))
-return cp_convert_to_pointer (type, e, dofold, complain);
+{
+  /* Undo what finish_non_static_data_member might have done, i.e.
+turning e.g. (R *)((S *)this)->r into (R *)*((S *)this)->r,
+rendering the conversion invalid.  */
+  if (REFERENCE_REF_P (e)
+ && TREE_CODE (TREE_OPERAND (e, 0)) == COMPONENT_REF)
+   e = TREE_OPERAND (e, 0);
+  return cp_convert_to_pointer (type, e, dofold, complain);
+}
   if (code == VECTOR_TYPE)
 {
   tree in_vtype = TREE_TYPE (e);
diff --git gcc/testsuite/g++.dg/cpp0x/range-for37.C 
gcc/testsuite/g++.dg/cpp0x/range-for37.C
new file mode 100644
index 000..d5c7c091d96
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp0x/range-for37.C
@@ -0,0 +1,24 @@
+// PR c++/89217
+// { dg-do compile { target c++11 } }
+
+struct R {};
+
+struct C
+{
+R* begin() const { return &r; }
+R* end() const { return &r; }
+
+R& r;
+};
+
+struct S
+{
+void f1() { f2(); }
+R& r;
+
+template
+void f2()
+{
+for (auto i : C{r}) {}
+}
+};


Re: [PATCH] print correct array sizes in errors (PR 87996)

2019-02-07 Thread Martin Sebor

On 2/7/19 4:00 PM, Jason Merrill wrote:

On 2/7/19 1:57 PM, Martin Sebor wrote:

+  /* The original numeric size as seen in the source code after
+ any substitution and before conversion to size_t.  */


I don't think this should mention substitution.  With that tweak the C++ 
changes are OK.


Does someone still need to approve the rest?  Those changes are
trivial: they just move max_object_size from builtins.{c,h} to
tree.{c.h}.

The change to c-parser.c was accidental and wasn't meant to be
included it in the last diff.  I attach yet another update with
that bit removed (and the tweak comment above).

Martin
PR c++/87996 - size of array is negative error when SIZE_MAX/2 < sizeof(array) <= SIZE_MAX

gcc/ChangeLog:

	PR c++/87996
	* builtins.c (max_object_size): Move from here...
	* builtins.h (max_object_size): ...and here...
	* tree.c (max_object_size): ...to here...
	* tree.h (max_object_size): ...and here.

gcc/c-family/ChangeLog:

	PR c++/87996
	* c-common.c (invalid_array_size_error): New function.
	(valid_array_size_p): Call it.  Handle size as well as type.
	* c-common.h (valid_constant_size_p): New function.
	(enum cst_size_error): New type.

gcc/cp/ChangeLog:

	PR c++/87996
	* decl.c (compute_array_index_type_loc): Preserve signed sizes
	for diagnostics.  Call valid_array_size_p instead of error.
	* init.c (build_new_1): Compute size for diagnostic.  Call
	invalid_array_size_error
	(build_new): Call valid_array_size_p instead of error.

gcc/testsuite/ChangeLog:

	PR c++/87996
	* c-c++-common/array-5.c: New test.
	* c-c++-common/pr68107.c: Adjust text of diagnostics.
	* g++.dg/init/new38.C: Same.
	* g++.dg/init/new43.C: Same.
	* g++.dg/init/new44.C: Same.
	* g++.dg/init/new46.C: Same.
	* g++.dg/other/large-size-array.C: Same.
	* g++.dg/other/new-size-type.C: Same.
	* g++.dg/template/array30.C: Same.
	* g++.dg/template/array32.C: New test.
	* g++.dg/template/dependent-name3.C: Adjust.
	* gcc.dg/large-size-array-3.c: Same.
	* gcc.dg/large-size-array-5.c: Same.
	* gcc.dg/large-size-array.c: Same.
	* g++.old-deja/g++.brendan/array1.C: Same.
	* g++.old-deja/g++.mike/p6149.C: Same.

Index: gcc/builtins.c
===
--- gcc/builtins.c	(revision 268547)
+++ gcc/builtins.c	(working copy)
@@ -11210,12 +11210,3 @@ target_char_cst_p (tree t, char *p)
   *p = (char)tree_to_uhwi (t);
   return true;
 }
-
-/* Return the maximum object size.  */
-
-tree
-max_object_size (void)
-{
-  /* To do: Make this a configurable parameter.  */
-  return TYPE_MAX_VALUE (ptrdiff_type_node);
-}
Index: gcc/builtins.h
===
--- gcc/builtins.h	(revision 268547)
+++ gcc/builtins.h	(working copy)
@@ -150,6 +150,5 @@ extern internal_fn replacement_internal_fn (gcall
 
 extern void warn_string_no_nul (location_t, const char *, tree, tree);
 extern tree unterminated_array (tree, tree * = NULL, bool * = NULL);
-extern tree max_object_size ();
 
 #endif /* GCC_BUILTINS_H */
Index: gcc/c-family/c-common.c
===
--- gcc/c-family/c-common.c	(revision 268547)
+++ gcc/c-family/c-common.c	(working copy)
@@ -8231,29 +8231,82 @@ reject_gcc_builtin (const_tree expr, location_t lo
   return false;
 }
 
+/* Issue an ERROR for an invalid SIZE of array NAME which is null
+   for unnamed arrays.  */
+
+void
+invalid_array_size_error (location_t loc, cst_size_error error,
+			  const_tree size, const_tree name)
+{
+  tree maxsize = max_object_size ();
+  switch (error)
+{
+case cst_size_negative:
+  if (name)
+	error_at (loc, "size %qE of array %qE is negative",
+		  size, name);
+  else
+	error_at (loc, "size %qE of array is negative",
+		  size);
+  break;
+case cst_size_too_big:
+  if (name)
+	error_at (loc, "size %qE of array %qE exceeds maximum "
+		  "object size %qE", size, name, maxsize);
+  else
+	error_at (loc, "size %qE of array exceeds maximum "
+		  "object size %qE", size, maxsize);
+  break;
+case cst_size_overflow:
+  if (name)
+	error_at (loc, "size of array %qE exceeds maximum "
+		  "object size %qE", name, maxsize);
+  else
+	error_at (loc, "size of array exceeds maximum "
+		  "object size %qE", maxsize);
+  break;
+default:
+  gcc_unreachable ();
+}
+}
+
 /* Check if array size calculations overflow or if the array covers more
than half of the address space.  Return true if the size of the array
-   is valid, false otherwise.  TYPE is the type of the array and NAME is
-   the name of the array, or NULL_TREE for unnamed arrays.  */
+   is valid, false otherwise.  T is either the type of the array or its
+   size, and NAME is the name of the array, or null for unnamed arrays.  */
 
 bool
-valid_array_size_p (location_t loc, tree type, tree name, bool complain)
+valid_array_size_p (location_t loc, const_tree t, tree name, bool complain)
 {
-  if (type != error_mark_node
-  && COM

Re: [PATCH] Integration of parallel standard algorithms for c++17

2019-02-07 Thread Thomas Rodgers
> We'll need to add a copy of the LICENSE.TXT file to our sources, since
> it's referred to by the comments at the top of each PSTL file.
>

Right, we were going to discuss where the "right place" under
libstdc++-v3/ would be for that.

As for the rest, noted, I'll add them to the list for the next go.

Jonathan Wakely writes:

> On 31/01/19 21:08 -0800, Thomas Rodgers wrote:
>>Update C++17 parallel algorithms to LLVM/MIT licensed upstream sources
>
> Some lines in bits/c++config.h need to be split before 80 columns
> (with a backslash if the split is in the middle of a preprocessor
> condition obviously).
>
> There are loads of very long lines in the actual PSTL headers too, but
> I think we need to keep them similar to the upstream headers to aid
> merging, so can't gratuitously reformat them. I'm assuming that
> doesn't apply to  since that's our header and any
> changes will need to be manually applied anyway, right?
>
> We'll need to add a copy of the LICENSE.TXT file to our sources, since
> it's referred to by the comments at the top of each PSTL file.
>
> Typo in include/Makefile.am: ${pstl_srcdir}/memoy_impl.h
> (Which will require regenerating include/Makefile.in).
>
> The __cpp_lib_parallel_algorithm macro needs to be (conditionally)
> defined in  as well as  and .
>
> The namespaces par_backend and unseq_backend need to be uglified.
>
> ALL the names in the __pstl::internal namespace need to be uglified:
> brick_any_of
> pattern_any_of
> for_each_n_it_serial
> brick_walk1
> pattern_walk1
> etc.
>
> I see quite a few calls to functions that should probably be qualified
> to prevent ADL, but that can be fixed later (and upstream first):
>
>   return brick_count(...
>   return except_handler([&]() { ...
>
>
> The DejaGnu directives in the pstl tests need to be in this order:
>
> // { dg-options "-std=gnu++17 -ltbb" }
> // { dg-do run { target c++17 } }
> // { dg-require-effective-target tbb-backend }
>
> Currently the dg-require-effective-target comes before the dg-do
> line, so doesn't work. DejaGnu processes the lines in order. When it
> sees the dg-do for target c++17 it overrides the effect of any earlier
> dg-require-effective-target.
>
> I fixed that locally like so:
> find testsuite/20_util/specialized_algorithms/pstl/ 
> testsuite/25_algorithms/pstl/ testsuite/26_numerics/pstl/ -name \*.cc | xargs 
> sed -i '/dg-require-eff/{h;d};/dg-do/{p;x}'
> And now I don't get FAIL results on a system with no TBB installed
> (which is great). I also get no FAIL results on a system with TBB
> installed (also great).
>
>
> There are two copies (with slight differences) of each of the
> uninitialized_xxx tests:
>
> libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_construct.cc
> |  128 +++
> libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_copy_move.cc
> |  143 
> libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/specialized.algorithms/uninitialized_fill_destroy.cc
>  |  103 ++
> libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_construct.cc
>|  132 
> libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_copy_move.cc
>|  154 +
> libstdc++-v3/testsuite/20_util/specialized_algorithms/pstl/uninitialized_fill_destroy.cc
> |  104 ++
>
> That's all for now, but I'll keep making passes over it, going into
> more detail ...



Re: [PATCH, fortran ieee]: PR 88678, Many gfortran.dg/ieee/ieee_X.f90 test cases fail starting with r267465

2019-02-07 Thread Steve Ellcey
On Thu, 2019-01-31 at 08:46 +0100, Uros Bizjak wrote:
> On Wed, Jan 30, 2019 at 9:51 PM Janne Blomqvist
>
> > This seems to change the only user of support_fpu_trap() that is
> > different from support_fpu_flag(), so with this change one could
> > remove support_fpu_trap() entirely and modify all callers (since
> > it's an internal function it's not used outside libgfortran) to
> > call support_fpu_flag() directly. Otherwise Ok.
> 
> Some targets only support IEEE flags (so, flags in some FP status
> register), but not exceptions (traps) based on these flags that halt
> the program. Currently, fpu-glibc.h assumes that all flags can
> generate exceptions (that is true for the current set of gfortran
> targets), but some future target wants to return 0 from
> support_fpu_trap.
> 
> Uros.

Is this actually true for all existing gfortran targets?  Specifically
I am wondering about Arm and Aarch64.  PR 78314 says that ARM trapping
FPU exceptions are optional.

I am currently seeing gfortran.dg/ieee/ieee_6.f90 fail on my Aarch64
ThunderX box.  I wonder if we should have an Arm/Aarch64 specific
version of the fpu header file (fpu-arm.h) that would use the previous 
version of support_fpu_trap.

Steve Ellcey
sell...@marvell.com



[RS6000] Don't support inline PLT for ABI_V4 bss-plt

2019-02-07 Thread Alan Modra
Inline PLT calls need PLT to be an array of addresses.  bss-plt works
differently.

Bootstrap and regression test on powerpc64-linux biarch in progress.
OK assuming no regressions?

* config/rs6000/rs6000.c (rs6000_longcall_ref): Don't use
inline plt for ABI_V4 bss-plt.
(rs6000_call_sysv): Likewise.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 711278c7422..cced90bb518 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -32819,7 +32819,8 @@ rs6000_longcall_ref (rtx call_ref, rtx arg)
 }
 
   if (HAVE_AS_PLTSEQ
-  && (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4))
+  && (DEFAULT_ABI == ABI_ELFv2
+ || (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT)))
 {
   rtx base = const0_rtx;
   int regno;
@@ -37981,7 +37982,7 @@ rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, 
rtx cookie)
   func = rs6000_longcall_ref (func_desc, tlsarg);
   /* If the longcall was implemented using PLT16 relocs, then r11
 needs to be valid at the call for lazy linking.  */
-  if (HAVE_AS_PLTSEQ)
+  if (HAVE_AS_PLTSEQ && REGNO (func) == 11)
abi_reg = func;
 }
 
@@ -37994,7 +37995,7 @@ rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, 
rtx cookie)
 calls via LR, so move the address there.  Needed to mark
 this insn for linker plt sequence editing too.  */
   func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
-  if (HAVE_AS_PLTSEQ
+  if (HAVE_AS_PLTSEQ && REGNO (func) == 11
  && GET_CODE (func_desc) == SYMBOL_REF)
{
  rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
@@ -38051,7 +38052,7 @@ rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx 
tlsarg, rtx cookie)
   func = rs6000_longcall_ref (func_desc, tlsarg);
   /* If the longcall was implemented using PLT16 relocs, then r11
 needs to be valid at the call for lazy linking.  */
-  if (HAVE_AS_PLTSEQ)
+  if (HAVE_AS_PLTSEQ && REGNO (func) == 11)
abi_reg = func;
 }
 
@@ -38063,7 +38064,7 @@ rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx 
tlsarg, rtx cookie)
   /* Indirect sibcalls must go via CTR.  Needed to mark
 this insn for linker plt sequence editing too.  */
   func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
-  if (HAVE_AS_PLTSEQ
+  if (HAVE_AS_PLTSEQ && REGNO (func) == 11
  && GET_CODE (func_desc) == SYMBOL_REF)
{
  rtvec v = gen_rtvec (3, func, func_desc, tlsarg);

-- 
Alan Modra
Australia Development Lab, IBM


[RS6000] Correct save_reg_p

2019-02-07 Thread Alan Modra
Fixes lack of r30 save/restore on

// -m32 -fpic -ftls-model=initial-exec
__thread char* p;
char** f1 (void) { return &p; }

and

// -m32 -fpic -msecure-plt
extern int foo (int);
int f1 (int x) { return foo (x); }

These are both caused by save_reg_p returning false when the pic
offset table reg (r30 for ABI_V4) was used, due to the logic not
exactly matching that in rs6000_emit_prologue to set up r30.  I've
simplified the ABI_V4 logic down to df_regs_ever_live_p since that is
obviously correct for all ABIs.

I also noticed that save_reg_p isn't following the comment regarding
calls_eh_return (since svn 267049, git 0edf78b1b2a0), and the comment
needs tweaking too.  For why the revised comment is correct, grep for
saves_all_registers in lra.c, and yes, we do want to save the pic
offset table reg for eh_return.

Bootstrap and regression test on powerpc64-linux biarch in progress.
OK assuming no regressions?  Segher this patch supercedes
https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00307.html as there's no
need to set crtl->uses_pic_offset_table for those cases now with the
df_regs_ever_live_p test in save_reg_p.

PR target/88343
* config/rs6000/rs6000.c (save_reg_p): Correct calls_eh_return
case.  Match logic in rs6000_emit_prologue emitting pic_offset_table
setup, simplifying ABI_V4 case to df_regs_ever_live_p.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index cced90bb518..9933d4443f7 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -24013,18 +24013,26 @@ save_reg_p (int reg)
 
   if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
 {
+  if (df_regs_ever_live_p (reg))
+   return true;
+
   /* When calling eh_return, we must return true for all the cases
 where conditional_register_usage marks the PIC offset reg
-call used.  */
+call used or fixed.  */
+  if (crtl->calls_eh_return
+ && ((DEFAULT_ABI == ABI_V4 && flag_pic)
+ || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
+ || (TARGET_TOC && TARGET_MINIMAL_TOC)))
+   return true;
+
   if (TARGET_TOC && TARGET_MINIMAL_TOC
- && (crtl->calls_eh_return
- || df_regs_ever_live_p (reg)
- || !constant_pool_empty_p ()))
+ && !constant_pool_empty_p ())
return true;
 
-  if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
+  if (DEFAULT_ABI == ABI_DARWIN
  && flag_pic && crtl->uses_pic_offset_table)
return true;
+  return false;
 }
 
   return !call_used_regs[reg] && df_regs_ever_live_p (reg);

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH] print correct array sizes in errors (PR 87996)

2019-02-07 Thread Jason Merrill

On 2/7/19 6:27 PM, Martin Sebor wrote:

On 2/7/19 4:00 PM, Jason Merrill wrote:

On 2/7/19 1:57 PM, Martin Sebor wrote:

+  /* The original numeric size as seen in the source code after
+ any substitution and before conversion to size_t.  */


I don't think this should mention substitution.  With that tweak the 
C++ changes are OK.


Does someone still need to approve the rest?  Those changes are
trivial: they just move max_object_size from builtins.{c,h} to
tree.{c.h}.

The change to c-parser.c was accidental and wasn't meant to be
included it in the last diff.  I attach yet another update with
that bit removed (and the tweak comment above).


OK.

Jason



Re: [REVISED PATCH 5/9]: C++ P0482R5 char8_t: Standard library support

2019-02-07 Thread Tom Honermann

On 2/7/19 4:44 AM, Jonathan Wakely wrote:

On 23/12/18 21:27 -0500, Tom Honermann wrote:
Attached is a revised patch that addresses changes in P0482R6.  
Changes from the prior patch include:

- Updated the value of the __cpp_char8_t feature test macro to 201811.

Tested on x86_64-linux.


Thanks, Tom, this is great work!

The front-end changes for char8_t went in recently, and I'm finally
ready to commit the library parts.

Great!

There's one big problem I found in
this patch, which is that the new numeric_limits
specialization uses constexpr unconditionally. That fails if 
is compiled using options like -std=c++98 -fno-char8_t because the
specialization will be used, but the constexpr keyword isn't allowed.
That's easily fixed by replacing the keyword with _GLIBCXX_CONSTEXPR.
Hmm, the code for the char8_t specialization was copied from the 
char16_t specialization which also uses constexpr unconditionally (but 
is guarded by a C++11+ requirement).  The char8_t specialization must be 
elided when the compiler is invoked with -std=c++98 -fno-char8_t (since 
the char8_t type doesn't exist then).  The _GLIBCXX_USE_CHAR8_T guard 
doesn't suffice for this? _GLIBCXX_USE_CHAR8_T should only be defined if 
__cpp_char8_t is defined; and that should only be defined if -fchar8_t 
or -std=c++2a is specified.  Or perhaps you intended -std=c++98 
-fchar8_t?  I agree in that case that use of _GLIBCXX_CONSTEXPR is 
necessary.


The other way to solve that problem would be for the compiler to give
an error if -fchar8_t is used with C++98, but I see no fundamental
reason that combination of options shouldn't be allowed. We can
support it in the library by using the macro.

Agreed.


As discussed in San Diego, the other change needed is to add the
abi_tag attribute to the new versions of path::u8string and
path::generic_u8string, so that the mangling is different when its
return type is different:

#ifdef _GLIBCXX_USE_CHAR8_T
   __attribute__((__abi_tag__("__u8")))
   std::u8string  u8string() const;
#else
   std::string    u8string() const;
#endif // _GLIBCXX_USE_CHAR8_T

Otherwise we get ODR violations when linking objects compiled
with -fchar8_t enabled to objects with it disabled (e.g. linking
-std=c++17 objects to -std=c++2a objects, which needs to work).


Are ODR violations bad? :)



I suggest "__u8" as the name of the ABI tag, but I'm open to other
suggestions. "__char8_t" is a bit long and verbose. "__cxx20" would be
consistent with "__cxx11" used for the new ABI introduced in GCC 5 but
it regularly confuses people who think it is coupled to the -std=c++11
option (and so don't understand why they still see it for -std=c++14).
I have no preference or alternative suggestions here.  Had I recognized 
the issue, I would have asked you what to do about it :)


Also, I see that you've made changes to  (to
add the experimental::u8string_view typedef) and to
std::experimental::path (to change the return type of u8string and
generic_u8string).

The former change is fairly harmless; it only adds a typedef, albeit
one which is not a reserved name in C++14/C++17 and so should be
available for users to define as a macro. Maybe prior to C++2a we
should only define it when GNU extensions are enabled (i.e. when using
-std=gnu++14 not -std=c++14):

#if defined _GLIBCXX_USE_CHAR8_T \
 && (__cplusplus > 201703L || !defined __STRICT_ANSI__)
 using u8string_view = basic_string_view;
#endif

That makes sense.


Changing the return type of experimental::path members concerns me
more. That's a published TS which is not going to be revised, and it's
not obvious to me that users would want the change in semantics. If
somebody is still using the Filesystem TS in C++2a code, they're
probably not expecting it to change. If they need to update their code
for C++2a they might as well just use std::filesystem, and so having
char8_t support in std::experimental::filesystem isn't clearly useful.

I agree.  I added the support to the experimental implementations more 
out of a desire to be complete and to remove any potential barriers to 
use of -fchar8_t than because I felt the changes were really necessary.  
I would be perfectly fine with skipping the updates to the experimental 
libraries completely.


Tom.



Re: [REVISED PATCH 7/9]: C++ P0482R5 char8_t: New standard library tests

2019-02-07 Thread Tom Honermann

On 2/7/19 4:54 AM, Jonathan Wakely wrote:

On 23/12/18 21:27 -0500, Tom Honermann wrote:
Attached is a revised patch that addresses changes in P0482R6.  
Changes from the prior patch include:

- Updated the value of the __cpp_char8_t feature test macro to 201811.

Tested on x86_64-linux.


There are quite a few additional changes needed to make the testsuite
pass cleanly with non-default options, e.g. when running it with
RUNTESTFLAGS=--target_board=unix/-fchar8_t/-fno-inline I see these
failures:
I remember thinking that I had to deal with this at one point.  It seems 
I then forgot about it.


FAIL: 21_strings/basic_string/literals/types.cc (test for excess errors)
FAIL: 21_strings/basic_string/literals/values.cc (test for excess errors)
UNRESOLVED: 21_strings/basic_string/literals/values.cc compilation 
failed to produce executable
FAIL: 21_strings/basic_string_view/literals/types.cc (test for excess 
errors)
FAIL: 21_strings/basic_string_view/literals/values.cc (test for excess 
errors)
UNRESOLVED: 21_strings/basic_string_view/literals/values.cc 
compilation failed to produce executable

FAIL: 22_locale/codecvt/char16_t.cc (test for excess errors)
UNRESOLVED: 22_locale/codecvt/char16_t.cc compilation failed to 
produce executable

FAIL: 22_locale/codecvt/char32_t.cc (test for excess errors)
UNRESOLVED: 22_locale/codecvt/char32_t.cc compilation failed to 
produce executable

FAIL: 22_locale/codecvt/codecvt_utf8/79980.cc (test for excess errors)
UNRESOLVED: 22_locale/codecvt/codecvt_utf8/79980.cc compilation failed 
to produce executable
FAIL: 22_locale/codecvt/codecvt_utf8/wchar_t/1.cc (test for excess 
errors)
UNRESOLVED: 22_locale/codecvt/codecvt_utf8/wchar_t/1.cc compilation 
failed to produce executable

FAIL: 22_locale/codecvt/utf8.cc (test for excess errors)
UNRESOLVED: 22_locale/codecvt/utf8.cc compilation failed to produce 
executable

FAIL: 22_locale/conversions/string/2.cc (test for excess errors)
UNRESOLVED: 22_locale/conversions/string/2.cc compilation failed to 
produce executable

FAIL: 22_locale/conversions/string/3.cc (test for excess errors)
UNRESOLVED: 22_locale/conversions/string/3.cc compilation failed to 
produce executable

FAIL: experimental/string_view/literals/types.cc (test for excess errors)
FAIL: experimental/string_view/literals/values.cc (test for excess 
errors)
UNRESOLVED: experimental/string_view/literals/values.cc compilation 
failed to produce executable


There would be similar errors running all the tests with -std=c++2a,
which is definitely something I do often and so want the tests to be
clean.

Absolutely, agreed.

We can either disable those tests when char8_t is enabled
(because we already have alternative tests checking the char8_t
versions of string_view etc.) or make them work either way, which the
attached patch begins doing (more changes are needed).
Since most of these tests exercise functionality that is not u8/char8_t 
specific, I think we should make them work.


I expect a different set of failures for -fno-char8_t (which is
probably a less important case to support that enabling char8_t in
older standards, but maybe still worth testing now and then).

I'm not sure it is less important.  -fno-char8_t may be an important 
tool for some code bases during their initial testing of, and migration 
to, C++20.


Tom.



Re: [PATCH] Fix up pre_and_rev_post_order_compute_fn

2019-02-07 Thread Richard Biener
On February 7, 2019 11:37:29 PM GMT+01:00, Jakub Jelinek  
wrote:
>Hi!
>
>I've noticed pre_and_rev_post_order_compute_fn is one of (apparently
>many)
>functions that take a struct function * argument and accept any
>function, as
>long as it is cfun.  For a patch I've been working on I actually need
>it
>to handle other functions as well and in this case it is trivial to
>fix.
>I've seen several others e.g. in graph.c, though some of those are
>harder to
>deal with, as some there are calls to functions that don't have a
>variant
>with struct function * argument and assume cfun implicitly.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

EH... 

OK. 
Richard. 

>2019-02-07  Jakub Jelinek  
>
>   * cfganal.c (pre_and_rev_post_order_compute_fn): Use fn instead of
>   cfun everywhere.
>
>--- gcc/cfganal.c.jj   2019-01-01 12:37:17.915962514 +0100
>+++ gcc/cfganal.c  2019-02-07 20:47:29.035520143 +0100
>@@ -951,10 +951,10 @@ pre_and_rev_post_order_compute_fn (struc
>  bool include_entry_exit)
> {
>   int pre_order_num = 0;
>-  int rev_post_order_num = n_basic_blocks_for_fn (cfun) - 1;
>+  int rev_post_order_num = n_basic_blocks_for_fn (fn) - 1;
> 
>   /* Allocate stack for back-tracking up CFG.  */
>-  auto_vec stack (n_basic_blocks_for_fn (cfun) +
>1);
>+  auto_vec stack (n_basic_blocks_for_fn (fn) + 1);
> 
>   if (include_entry_exit)
> {
>@@ -968,7 +968,7 @@ pre_and_rev_post_order_compute_fn (struc
> rev_post_order_num -= NUM_FIXED_BLOCKS;
> 
>   /* Allocate bitmap to track nodes that have been visited.  */
>-  auto_sbitmap visited (last_basic_block_for_fn (cfun));
>+  auto_sbitmap visited (last_basic_block_for_fn (fn));
> 
>   /* None of the nodes in the CFG have been visited yet.  */
>   bitmap_clear (visited);
>
>   Jakub



Re: [C++PATCH] [PR87322] move cp_evaluated up to tsubst all lambda parms

2019-02-07 Thread Alexandre Oliva
On Feb  7, 2019, Jason Merrill  wrote:

>> + PR c++/86322.  */

> Wrong PR number.

Thanks

>> +  if (local_specializations)
>> +if (tree r = retrieve_local_specialization (t))
>> +  return r;

> Hmm, I would expect this to do the wrong thing for pack expansion of a
> lambda, giving us only one rather than one per element of the
> expansion.

Oooh.  Indeed, I hadn't realized this was possible.

I think we should separate local_specializations arising from different
pack expansion bindings, considering they won't always appear in tsubst
args:


diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 0177d7f77891..9f954698f454 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -11698,6 +11698,10 @@ gen_elem_of_pack_expansion_instantiation (tree pattern,
   ARGUMENT_PACK_SELECT_INDEX (aps) = index;
 }
 
+  // Any local specialization bindings arising from this substitution
+  // cannot be reused for a different INDEX.
+  local_specialization_stack lss (lss_copy);
+
   /* Substitute into the PATTERN with the (possibly altered)
  arguments.  */
   if (pattern == in_decl)


> For instance, in lambda-variadic5.C we should get three
> instantiations of the "print" function template; can you add that
> check to the test?

This causes lambda-variadic5.C to get 6 separate lambdas, instead of 2,
so it passes after the following patchlet:

diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-variadic5.C 
b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-variadic5.C
index 97f64cd761a7..1f931757b72f 100644
--- a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-variadic5.C
+++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-variadic5.C
@@ -1,5 +1,7 @@
 // PR c++/47226
 // { dg-do compile { target c++11 } }
+// { dg-options "-fdump-tree-original" }
+// { dg-final { scan-tree-dump-times ":: \\(null\\)" 6 
"original" } }
 
 template
 void print(const T&) {}


Now, should I adjust lambda-variadic5.C to check for the presence of the
expencted count of lambdas in compiler dumps?  Or should I go for a new
test?

Thinking of how to test for it drew my attention to issues about lambda
naming, that may have implications on ABI and duplicate code removal.
There are two issues I'm concerned with:

- we increment lambda_cnt when introducing a lambda within a generic,
  and then increment it again as we specialize, partially or fully, each
  lambda expr/object/type set.  This makes for gaps in numbering of full
  specializations and, being a global counter, does not ensure the
  assignment of the same symbol name to corresponding instantiations in
  separate translation units

- we don't seem to attempt to reuse lambdas across different
  specializations of argument pack expansions.  I guess that's ok, we
  don't attempt to reuse them across different specializations of the
  enclosing template function or class either, but, unlike enclosing
  contexts, different argument pack indices do not mandate different
  mangled names.  This came up because I was thinking of having a
  sub-map in local_specializations, so that the lambda would map to
  another map in which we could look up template arguments to find a
  specific instance.  That wouldn't work, in part because args does not
  provide the entire set of substitutions, and in another part because
  we don't seem to be able to identify, in patterns undergoing
  substitution but before actually performing the substitution, the set
  of (implied?) template arguments that might affect the substitution
  result.  To make this concrete: in

[&t]{return([]{return(0);}()+t);}...

  we might be able to reuse the nested lambda across expansions of the
  enclosing lambda.  except that, being nested, their contexts wouldn't
  allow that.  but what if they were siblings, say:

([&t]{return(t);}()+[]{return(0);}())...
  
  here the latter is in no way affected by the t pack, so we could
  create a single lambda, reusing it for all indices in t.  The patchlet
  above will stand in the way of it, but IIUC it doesn't matter, we're
  not supposed/expected to do that, even when the decision on whether or
  not to reuse might affect symbol naming, e.g. if it appears in an
  initializer of a data member.


Here's the patch I'm testing.  OK if it passes?


[PR87322] move cp_evaluated up to tsubst all lambda parms

From: Alexandre Oliva 

A lambda capture variable initialized with a lambda expr taking more
than one parameter got us confused.

The first problem was that the parameter list was cut short during
tsubsting because we tsubsted it with cp_unevaluated_operand.  We
reset it right after, to tsubst the function body, so I've moved the
reset up so that it's in effect while processing the parameters as
well.

The second problem was that the lambda expr appeared twice, once in a
decltype that gave the capture variable its type, and once in its
initializer.  This caused us to instantiate two separate lambda exprs
and closure types, and then to flag that the lambda expr in the
initializer could not be conver

Re: [C++PATCH] [PR86379] do not use TREE_TYPE for USING_DECL_SCOPE

2019-02-07 Thread Alexandre Oliva
On Feb  7, 2019, Jason Merrill  wrote:

> OK, that makes sense; it isn't always clear what the right handling of
> a USING_DECL is.

Indeed.  Like, in shared_member_p, I'm wondering if we shouldn't recurse
if the USING_DECL maps to an overload, that IIUC might contain static
and non-static functions, other using decls, and maybe even types.

> In protected_accessible_p and shared_member_p, if we're left with a
> USING_DECL after strip_using_decl, we can't give a meaningful answer,
> and should probably abort; we shouldn't get here with a dependent
> expression.

Ah, that's good knowledge to have.  I've added asserts and comments.


Here's what I'm testing.


[PR86379] do not use TREE_TYPE for USING_DECL_SCOPE

From: Alexandre Oliva 

It's too risk to reuse the type field for USING_DECL_SCOPE.
Language-independent parts of the compiler, such as location and
non-lvalue wrappers, happily take the TREE_TYPE of a USING_DECL as if
it was a type rather than an unrelated scope.

For better or worse, USING_DECLs use the non-common struct so we can
use the otherwise unused result field.  Adjust fallout, from uses of
TREE_TYPE that were supposed to be USING_DECL_SCOPE, to other
accidental uses of TREE_TYPE of a USING_DECL.


for  gcc/cp/ChangeLog

PR c++/86379
* cp-tree.h (USING_DECL_SCOPE): Use result rather than type.
* name-lookup.c (strip_using_decl): Use USING_DECL_SCOPE.
* search.c (protected_accessible_p): Follow USING_DECL_DECLS.
(shared_member_p): Likewise.
(lookup_member): Likewise.
* decl.c (grok_special_member_properties): Skip USING_DECLs.
* semantics.c (finish_omp_declare_simd_methods): Likewise.

for  gcc/testsuite/ChangeLog

PR c++/86379
* g++.dg/cpp0x/pr86379.C: New.
---
 gcc/cp/cp-tree.h |2 
 gcc/cp/decl.c|3 
 gcc/cp/name-lookup.c |2 
 gcc/cp/search.c  |   19 +++
 gcc/cp/semantics.c   |3 
 gcc/testsuite/g++.dg/cpp0x/pr86379.C |  207 ++
 6 files changed, 229 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr86379.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index dada3a6aa410..44a3620a539f 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -3293,7 +3293,7 @@ struct GTY(()) lang_decl {
 #define DECL_DEPENDENT_P(NODE) DECL_LANG_FLAG_0 (USING_DECL_CHECK (NODE))
 
 /* The scope named in a using decl.  */
-#define USING_DECL_SCOPE(NODE) TREE_TYPE (USING_DECL_CHECK (NODE))
+#define USING_DECL_SCOPE(NODE) DECL_RESULT_FLD (USING_DECL_CHECK (NODE))
 
 /* The decls named by a using decl.  */
 #define USING_DECL_DECLS(NODE) DECL_INITIAL (USING_DECL_CHECK (NODE))
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 65ba812deb67..373b79b844dc 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -13297,7 +13297,8 @@ grok_special_member_properties (tree decl)
 {
   tree class_type;
 
-  if (!DECL_NONSTATIC_MEMBER_FUNCTION_P (decl))
+  if (TREE_CODE (decl) == USING_DECL
+  || !DECL_NONSTATIC_MEMBER_FUNCTION_P (decl))
 return;
 
   class_type = DECL_CONTEXT (decl);
diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c
index d7b9029b0a3a..959f43b02384 100644
--- a/gcc/cp/name-lookup.c
+++ b/gcc/cp/name-lookup.c
@@ -2100,7 +2100,7 @@ strip_using_decl (tree decl)
 
 using typename :: [opt] nested-name-specifier unqualified-id ;
   */
-  decl = make_typename_type (TREE_TYPE (decl),
+  decl = make_typename_type (USING_DECL_SCOPE (decl),
 DECL_NAME (decl),
 typename_type, tf_error);
   if (decl != error_mark_node)
diff --git a/gcc/cp/search.c b/gcc/cp/search.c
index 0367e4952138..4c3fffda717c 100644
--- a/gcc/cp/search.c
+++ b/gcc/cp/search.c
@@ -623,6 +623,11 @@ protected_accessible_p (tree decl, tree derived, tree 
type, tree otype)
   if (!DERIVED_FROM_P (type, derived))
 return 0;
 
+  /* DECL_NONSTATIC_MEMBER_P won't work for USING_DECLs.  */
+  decl = strip_using_decl (decl);
+  /* We don't expect or support dependent decls.  */
+  gcc_assert (TREE_CODE (decl) != USING_DECL);
+
   /* [class.protected]
 
  When a friend or a member function of a derived class references
@@ -928,8 +933,13 @@ shared_member_p (tree t)
   if (is_overloaded_fn (t))
 {
   for (ovl_iterator iter (get_fns (t)); iter; ++iter)
-   if (DECL_NONSTATIC_MEMBER_FUNCTION_P (*iter))
- return 0;
+   {
+ tree decl = strip_using_decl (*iter);
+ /* We don't expect or support dependent decls.  */
+ gcc_assert (TREE_CODE (decl) != USING_DECL);
+ if (DECL_NONSTATIC_MEMBER_FUNCTION_P (decl))
+   return 0;
+   }
   return 1;
 }
   return 0;
@@ -1177,7 +1187,10 @@ lookup_member (tree xbasetype, tree name, int protect, 
bool want_type,
   && !really_overloaded_fn (rval))
 {
   tree decl = is_overloaded_fn 

[WIP PATCH] Improve tail call analysis and inliner EH clobber through variable life analysis (PR tree-optimization/89060)

2019-02-07 Thread Jakub Jelinek
Hi!

The following patch uses a simple data flow to find live (addressable)
variables at certain spots (for tail call discovery at the point of the
potential tail call, so that we don't reject tail calls because of variables
that can be known out of scope already so that people don't have to find
ugly workarounds if they really need tail calls, and for the recently
added inline EH pad clobber addition so that we don't add too many
variables).  Bootstrapped/regtested on x86_64-linux and i686-linux.

Haven't gathered statistics on how often does it trigger yet.
Shall I use double queue (pending/working sbitmaps) to speed it up (guess I
could gather statistics if that helps reasonably)?  But if so, perhaps
add_scope_conflicts should change too.  I haven't shared code with
what add_scope_conflicts does because it isn't really that big chunk of code
and would need many modifications to make it generic enough to handle
add_scope_conflicts needs, plus as I wanted to make it a helper for other
optimizations, I didn't want to use bb->aux etc.  For tail call, I see
another option to compute it unconditionally once at the start of the pass
for all may_be_aliased auto_var_in_fn_p vars and then just walk a single
bb with each (potential) tail call, instead of computing it for every
potential tail call again (on the other side with perhaps smaller set of
variables).  In that case e.g. vars == NULL argument could imply the
VAR_P && may_be_aliased && auto_var_in_fn_p test instead of bitmap_bit_p
test, we could drop the stop_after argument and instead export the _1
function renamed to something to walk a single bb (or wrapper to it that
would set up the structure) until stop_after.

Thoughts on this?

2019-02-08  Jakub Jelinek  

PR tree-optimization/89060
* tree-ssa-live.h (compute_live_vars, destroy_live_vars): Declare.
* tree-ssa-live.c: Include gimple-walk.h and cfganal.h.
(struct compute_live_vars_data): New type.
(compute_live_vars_visit, compute_live_vars_1, compute_live_vars,
destroy_live_vars): New functions.
* tree-tailcall.c (find_tail_calls): Perform variable life analysis
before punting.
* tree-inline.h (struct copy_body_data): Add eh_landing_pad_dest
member.
* tree-inline.c (add_clobbers_to_eh_landing_pad): Remove BB argument.
Perform variable life analysis to select variables that really need
clobbers added.
(copy_edges_for_bb): Don't call add_clobbers_to_eh_landing_pad here,
instead set id->eh_landing_pad_dest and assert it is the same.
(copy_cfg_body): Call it here if id->eh_landing_pad_dest is non-NULL.

* gcc.dg/tree-ssa/pr89060.c: New test.

--- gcc/tree-ssa-live.h.jj  2019-01-01 12:37:16.967978068 +0100
+++ gcc/tree-ssa-live.h 2019-02-07 19:02:58.233530924 +0100
@@ -265,6 +265,8 @@ extern tree_live_info_p calculate_live_r
 extern void debug (tree_live_info_d &ref);
 extern void debug (tree_live_info_d *ptr);
 extern void dump_live_info (FILE *, tree_live_info_p, int);
+extern vec compute_live_vars (struct function *, bitmap, gimple 
*);
+extern void destroy_live_vars (vec &);
 
 
 /*  Return TRUE if P is marked as a global in LIVE.  */
--- gcc/tree-ssa-live.c.jj  2019-01-01 12:37:16.055993032 +0100
+++ gcc/tree-ssa-live.c 2019-02-07 19:34:33.046401259 +0100
@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3.
 #include "stringpool.h"
 #include "attribs.h"
 #include "optinfo.h"
+#include "gimple-walk.h"
+#include "cfganal.h"
 
 static void verify_live_on_entry (tree_live_info_p);
 
@@ -1194,8 +1196,142 @@ calculate_live_ranges (var_map map, bool
 
   return live;
 }
+
+/* Data structure for compute_live_vars* functions.  */
 
+struct compute_live_vars_data {
+  /* Vector of bitmaps for live vars at the end of basic blocks,
+ indexed by bb->index.  ACTIVE[ENTRY_BLOCK] must be empty bitmap,
+ ACTIVE[EXIT_BLOCK] is used for STOP_AFTER.  */
+  vec active;
+  /* Work bitmap of currently live variables.  */
+  bitmap work;
+  /* Bitmap of interesting variables.  Variables with uids not in this
+ bitmap are not tracked.  */
+  bitmap vars;
+};
 
+/* Callback for walk_stmt_load_store_addr_ops.  If OP is a VAR_DECL with
+   uid set in DATA->vars, enter its uid into bitmap DATA->work.  */
+
+static bool
+compute_live_vars_visit (gimple *, tree op, tree, void *pdata)
+{
+  compute_live_vars_data *data = (compute_live_vars_data *) pdata;
+  op = get_base_address (op);
+  if (op && VAR_P (op) && bitmap_bit_p (data->vars, DECL_UID (op)))
+bitmap_set_bit (data->work, DECL_UID (op));
+  return false;
+}
+
+/* Helper routine for compute_live_vars, calculating the sets of live
+   variables at the end of BB, leaving the result in DATA->work.
+   If STOP_AFTER is non-NULL, stop processing after that stmt.  */
+
+static void
+compute_live_vars_1 (basic_block bb, compute_live_vars_data *data,
+gimple *stop_after)
+{
+  edge e;
+  edge_iterato

Re: Xfail gfortran.dg/pr79966.f90

2019-02-07 Thread Richard Biener
On Thu, 7 Feb 2019, Rainer Orth wrote:

> Hi Honza,
> 
> > PR ipa/88711
> > * gfortran.dg/pr79966.f90: Xfail for time being.
> > Index: ../../gcc/testsuite/gfortran.dg/pr79966.f90
> > ===
> > --- ../../gcc/testsuite/gfortran.dg/pr79966.f90 (revision 268579)
> > +++ ../../gcc/testsuite/gfortran.dg/pr79966.f90 (working copy)
> > @@ -108,5 +108,5 @@ contains
> >  
> >call RunTPTests()
> >end program
> > -
> > -! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into 
> > runtptests/\[0-9\]+" "inline" } }
> > +! See PR88711. Inliner is currently not able to figure out that inlining 
> > tp_sum is a good idea.
> > +! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into 
> > runtptests/\[0-9\]+" "inline" { xfail spu-*-* } } }
> 
> why restrict the xfail to spu only?  According to gcc-testresults, the
> test currently fails everywhere (aarch64, arm*, hppa*, i?86, ia64, m68k,
> powerpc*, riscv64, sparc*, x86_64).

OK if you xfail unconditionally on *-*-*

Richard.