[PATCH] libstdc++: Implement LWG 3150 for std::uniform_random_bit_generator

2020-02-15 Thread Jonathan Wakely
* include/bits/random.h (uniform_random_bit_generator): Require min()
and max() to be constant expressions and min() to be less than max().
* testsuite/26_numerics/random/concept.cc: Check additional cases.
* testsuite/26_numerics/random/pr60037-neg.cc: Adjust dg-error lineno.

Tested powerpc64le-linux, committed to master.


commit 5b1d588509551291f4028497858ee9e04ce0bdee
Author: Jonathan Wakely 
Date:   Sat Feb 15 08:58:43 2020 +

libstdc++: Implement LWG 3150 for std::uniform_random_bit_generator

* include/bits/random.h (uniform_random_bit_generator): Require 
min()
and max() to be constant expressions and min() to be less than 
max().
* testsuite/26_numerics/random/concept.cc: Check additional cases.
* testsuite/26_numerics/random/pr60037-neg.cc: Adjust dg-error 
lineno.

diff --git a/libstdc++-v3/include/bits/random.h 
b/libstdc++-v3/include/bits/random.h
index 3eefdefc96f..d4aebf45af0 100644
--- a/libstdc++-v3/include/bits/random.h
+++ b/libstdc++-v3/include/bits/random.h
@@ -60,6 +60,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   {
{ _Gen::min() } -> same_as>;
{ _Gen::max() } -> same_as>;
+   requires bool_constant<(_Gen::min() < _Gen::max())>::value;
   };
 #endif
 
diff --git a/libstdc++-v3/testsuite/26_numerics/random/concept.cc 
b/libstdc++-v3/testsuite/26_numerics/random/concept.cc
index 69a7fc1e674..cb1ea882e16 100644
--- a/libstdc++-v3/testsuite/26_numerics/random/concept.cc
+++ b/libstdc++-v3/testsuite/26_numerics/random/concept.cc
@@ -219,3 +219,30 @@ struct N11
 };
 
 static_assert( ! std::uniform_random_bit_generator );
+
+struct N12
+{
+  unsigned operator()();
+  static unsigned min() { return 0; } // not constexpr
+  static constexpr unsigned max() { return 1; }
+};
+
+static_assert( ! std::uniform_random_bit_generator ); // LWG 3150
+
+struct N13
+{
+  unsigned operator()();
+  static constexpr unsigned min() { return 0; }
+  static unsigned max() { return 1; } // not constexpr
+};
+
+static_assert( ! std::uniform_random_bit_generator ); // LWG 3150
+
+struct N14
+{
+  unsigned operator()();
+  static constexpr unsigned min() { return 1; }
+  static constexpr unsigned max() { return 0; } // max not greater than min
+};
+
+static_assert( ! std::uniform_random_bit_generator ); // LWG 3150
diff --git a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc 
b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
index 201b87e9c52..91e5566c54a 100644
--- a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
+++ b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc
@@ -10,6 +10,6 @@ std::__detail::_Adaptor 
aurng(urng);
 auto x = std::generate_canonical::digits>(urng);
 
-// { dg-error "static assertion failed: template argument must be a floating 
point type" "" { target *-*-* } 171 }
+// { dg-error "static assertion failed: template argument must be a floating 
point type" "" { target *-*-* } 172 }
 
 // { dg-error "static assertion failed: template argument must be a floating 
point type" "" { target *-*-* } 3281 }


[PATCH] libstdc++: Update __cpp_lib_erase_if macro (P1115R3)

2020-02-15 Thread Jonathan Wakely
Now that this feature has been approved for C++20 we can define the
macro to the official value.

* include/bits/erase_if.h (__cpp_lib_erase_if): Define to 202002L.
* include/std/deque: Likewise.
* include/std/forward_list: Likewise.
* include/std/list: Likewise.
* include/std/string: Likewise.
* include/std/vector: Likewise.
* include/std/version: Likewise.
* testsuite/23_containers/deque/erasure.cc: Test for new value.
* testsuite/23_containers/forward_list/erasure.cc: Likewise.
* testsuite/23_containers/list/erasure.cc: Likewise.
* testsuite/23_containers/map/erasure.cc: Likewise.
* testsuite/23_containers/set/erasure.cc: Likewise.
* testsuite/23_containers/unordered_map/erasure.cc: Likewise.
* testsuite/23_containers/unordered_set/erasure.cc: Likewise.
* testsuite/23_containers/vector/erasure.cc: Likewise.

Tested powerpc64le-linux, committed to master.


commit 55b00d14f4daf671b865550c119dafdeb3139672
Author: Jonathan Wakely 
Date:   Sat Feb 15 09:02:30 2020 +

libstdc++: Update __cpp_lib_erase_if macro (P1115R3)

Now that this feature has been approved for C++20 we can define the
macro to the official value.

* include/bits/erase_if.h (__cpp_lib_erase_if): Define to 202002L.
* include/std/deque: Likewise.
* include/std/forward_list: Likewise.
* include/std/list: Likewise.
* include/std/string: Likewise.
* include/std/vector: Likewise.
* include/std/version: Likewise.
* testsuite/23_containers/deque/erasure.cc: Test for new value.
* testsuite/23_containers/forward_list/erasure.cc: Likewise.
* testsuite/23_containers/list/erasure.cc: Likewise.
* testsuite/23_containers/map/erasure.cc: Likewise.
* testsuite/23_containers/set/erasure.cc: Likewise.
* testsuite/23_containers/unordered_map/erasure.cc: Likewise.
* testsuite/23_containers/unordered_set/erasure.cc: Likewise.
* testsuite/23_containers/vector/erasure.cc: Likewise.

diff --git a/libstdc++-v3/include/bits/erase_if.h 
b/libstdc++-v3/include/bits/erase_if.h
index 2641fe63ba3..dc5e6ae584f 100644
--- a/libstdc++-v3/include/bits/erase_if.h
+++ b/libstdc++-v3/include/bits/erase_if.h
@@ -39,7 +39,7 @@ namespace std
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #if __cplusplus > 201703L
-# define __cpp_lib_erase_if 201900L
+# define __cpp_lib_erase_if 202002L
 #endif
 
   namespace __detail
diff --git a/libstdc++-v3/include/std/deque b/libstdc++-v3/include/std/deque
index efe04dbbb7c..07e2c69d02d 100644
--- a/libstdc++-v3/include/std/deque
+++ b/libstdc++-v3/include/std/deque
@@ -91,7 +91,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-#define __cpp_lib_erase_if 201900L
+#define __cpp_lib_erase_if 202002L
 
   template
 inline typename deque<_Tp, _Alloc>::size_type
diff --git a/libstdc++-v3/include/std/forward_list 
b/libstdc++-v3/include/std/forward_list
index 8eb1ad150a7..439ef6b380c 100644
--- a/libstdc++-v3/include/std/forward_list
+++ b/libstdc++-v3/include/std/forward_list
@@ -62,7 +62,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-#define __cpp_lib_erase_if 201900L
+#define __cpp_lib_erase_if 202002L
 
   template
 inline typename forward_list<_Tp, _Alloc>::size_type 
diff --git a/libstdc++-v3/include/std/list b/libstdc++-v3/include/std/list
index 1fb51a5abb4..7b3d3c2e888 100644
--- a/libstdc++-v3/include/std/list
+++ b/libstdc++-v3/include/std/list
@@ -86,7 +86,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-#define __cpp_lib_erase_if 201900L
+#define __cpp_lib_erase_if 202002L
 
   template
 inline typename list<_Tp, _Alloc>::size_type
diff --git a/libstdc++-v3/include/std/string b/libstdc++-v3/include/std/string
index 6a3b61f8622..a95ef4dd541 100644
--- a/libstdc++-v3/include/std/string
+++ b/libstdc++-v3/include/std/string
@@ -118,7 +118,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-#define __cpp_lib_erase_if 201900L
+#define __cpp_lib_erase_if 202002L
 
   template
diff --git a/libstdc++-v3/include/std/vector b/libstdc++-v3/include/std/vector
index 2a7b4c0f629..6f587a82a09 100644
--- a/libstdc++-v3/include/std/vector
+++ b/libstdc++-v3/include/std/vector
@@ -101,7 +101,7 @@ namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
-#define __cpp_lib_erase_if 201900L
+#define __cpp_lib_erase_if 202002L
 
   template
 inline typename vector<_Tp, _Alloc>::size_type
diff --git a/libstdc++-v3/include/std/version b/libstdc++-v3/include/std/version
index d8a97767453..8a4affa60a0 100644
--- a/libstdc++-v3/include/std/version
+++ b/libstdc++-v3/include/std/version
@@ -184,7 +184,7 @@
 #define __cpp_lib_constexpr_complex 201711L
 #define __cpp_l

Re: [PATCH] match.pd: Disallow side-effects in GENERIC for non-COND_EXPR to COND_EXPR simplifications [PR93744]

2020-02-15 Thread Richard Biener
On February 15, 2020 7:09:51 AM GMT+01:00, Jakub Jelinek  
wrote:
>Hi!
>
>As the following testcases show (the first one reported, last two
>found by code inspection), we need to disallow side-effects
>in simplifications that turn some unconditional expression into
>conditional
>one.  From my little understanding of genmatch.c, it is able to
>automatically disallow side effects if the same operand is used
>multiple
>times in the match pattern, maybe if it is used multiple times in the
>replacement pattern, and if it is used in conditional contexts in the
>match
>pattern, could it be taught to handle this case too?  If yes, perhaps
>just the first hunk could be usable for 8/9 backports (+ the
>testcases).

It could possibly be done but then it's only three cases so far. 

OK. 

Richard. 

>Bootstrapped/regtested on x86_64-linux and i686-linux.
>
>2020-02-15  Jakub Jelinek  
>
>   PR tree-optimization/93744
>   * match.pd (((m1 >/=/<= m2) * d -> (m1 >/=/<= m2) ? d : 0,
>   A - ((A - B) & -(C cmp D)) -> (C cmp D) ? B : A,
>   A + ((B - A) & -(C cmp D)) -> (C cmp D) ? B : A): For GENERIC, make
>   sure @2 in the first and @1 in the other patterns has no side-effects.
>
>   * gcc.c-torture/execute/pr93744-1.c: New test.
>   * gcc.c-torture/execute/pr93744-2.c: New test.
>   * gcc.c-torture/execute/pr93744-3.c: New test.
>
>--- gcc/match.pd.jj2020-02-05 11:12:33.679383217 +0100
>+++ gcc/match.pd   2020-02-14 22:49:22.858771394 +0100
>@@ -1472,7 +1472,8 @@ (define_operator_list COND_TERNARY
> (for cmp (gt lt ge le)
> (simplify
>  (mult (convert (cmp @0 @1)) @2)
>-  (cond (cmp @0 @1) @2 { build_zero_cst (type); })))
>+  (if (GIMPLE || !TREE_SIDE_EFFECTS (@2))
>+   (cond (cmp @0 @1) @2 { build_zero_cst (type); }
> 
> /* For integral types with undefined overflow and C != 0 fold
>x * C EQ/NE y * C into x EQ/NE y.  */
>@@ -2709,7 +2710,8 @@ (define_operator_list COND_TERNARY
>&& TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
>&& INTEGRAL_TYPE_P (TREE_TYPE (@5))
>&& (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
>- || !TYPE_UNSIGNED (TREE_TYPE (@4
>+ || !TYPE_UNSIGNED (TREE_TYPE (@4)))
>+   && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
>(cond (cmp @2 @3) @1 @0)))
>  (simplify
>   (plus:c @0 (bit_and:c (minus @1 @0)
>@@ -2719,7 +2721,8 @@ (define_operator_list COND_TERNARY
>&& TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
>&& INTEGRAL_TYPE_P (TREE_TYPE (@5))
>&& (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
>- || !TYPE_UNSIGNED (TREE_TYPE (@4
>+ || !TYPE_UNSIGNED (TREE_TYPE (@4)))
>+   && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
>(cond (cmp @2 @3) @1 @0
> 
> /* Simplifications of shift and rotates.  */
>--- gcc/testsuite/gcc.c-torture/execute/pr93744-1.c.jj 2020-02-14
>22:50:58.993346192 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr93744-1.c2020-02-14
>22:49:57.934251395 +0100
>@@ -0,0 +1,14 @@
>+/* PR tree-optimization/93744 */
>+
>+typedef int I;
>+
>+int
>+main ()
>+{
>+  int a = 0;
>+  I b = 0;
>+  (a > 0) * (b |= 2);
>+  if (b != 2)
>+__builtin_abort ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr93744-2.c.jj 2020-02-14
>22:51:01.100314955 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr93744-2.c2020-02-14
>22:50:18.299949478 +0100
>@@ -0,0 +1,21 @@
>+/* PR tree-optimization/93744 */
>+
>+int w;
>+
>+int
>+foo (int x, int y, int z)
>+{
>+  int r = z - ((z - w++) & -(x < y));
>+  return r;
>+}
>+
>+int
>+main ()
>+{
>+  w = 4;
>+  if (foo (5, 7, 12) != 4 || w != 5)
>+__builtin_abort ();
>+  if (foo (7, 5, 12) != 12 || w != 6)
>+__builtin_abort ();
>+  return 0;
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr93744-3.c.jj 2020-02-14
>22:51:03.415280636 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr93744-3.c2020-02-14
>22:50:25.820837971 +0100
>@@ -0,0 +1,21 @@
>+/* PR tree-optimization/93744 */
>+
>+int w;
>+
>+int
>+foo (int x, int y, int z)
>+{
>+  int r = z + ((w++ - z) & -(x < y));
>+  return r;
>+}
>+
>+int
>+main ()
>+{
>+  w = 4;
>+  if (foo (5, 7, 12) != 4 || w != 5)
>+__builtin_abort ();
>+  if (foo (7, 5, 12) != 12 || w != 6)
>+__builtin_abort ();
>+  return 0;
>+}
>
>   Jakub



Re: [committed] c++: Fix constexpr if and braced functional cast.

2020-02-15 Thread Jason Merrill

On 2/13/20 12:42 AM, Jason Merrill wrote:

While partially instantiating a generic lambda, we can encounter pack
expansions or constexpr if where we can't actually do the substitution
immediately, and instead remember a partial instantiation context
in *_EXTRA_ARGS.  This includes any local_specializations used in the
pattern or condition.  In this testcase our tree walk wasn't finding the use
of i because we weren't walking into the type of a CONSTRUCTOR.  Fixed by
moving the code for doing that from find_parameter_packs_r into
cp_walk_subtrees.

Tested x86_64-pc-linux-gnu, applying to trunk.

2020-02-11  Jason Merrill  

PR c++/92583
PR c++/92654
* tree.c (cp_walk_subtrees): Walk CONSTRUCTOR types here.
* pt.c (find_parameter_packs_r): Not here.


Another place that is redundant with the code in cp_walk_subtrees:
commit ce23347267daba090f619d61c6a7a749ea5dbeab
Author: Jason Merrill 
Date:   Fri Feb 14 11:37:26 2020 +0100

c++: Remove more dead code.

gcc/cp/ChangeLog
2020-02-14  Jason Merrill  

PR c++/92583
* pt.c (any_template_parm_r): Remove CONSTRUCTOR handling.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 6e7f4555da8..d19bde7bcbe 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -10448,11 +10448,6 @@ any_template_parm_r (tree t, void *data)
   WALK_SUBTREE (TREE_OPERAND (t, 1));
   break;
 
-case CONSTRUCTOR:
-  if (TREE_TYPE (t))
-WALK_SUBTREE (TREE_TYPE (t));
-  break;
-
 case PARM_DECL:
   /* A parameter or constraint variable may also depend on a template
 	 parameter without explicitly naming it.  */


[committed] c++: Fix lambda in atomic constraint [PR92556]

2020-02-15 Thread Jason Merrill
find_template_parameters needs to find the mention of T in the lambda.
Fixing that leaves this as a hard error, which may be surprising but is
consistent with lambdas in other SFINAE contexts like template argument
deduction.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog
2020-02-15  Jason Merrill  

PR c++/92556
* pt.c (any_template_parm_r): Look into lambda body.
---
 gcc/cp/pt.c   |  9 +
 gcc/testsuite/g++.dg/cpp2a/concepts-lambda5.C | 10 ++
 2 files changed, 19 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-lambda5.C

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index d19bde7bcbe..6c9abb8f3d3 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -10479,6 +10479,15 @@ any_template_parm_r (tree t, void *data)
   }
   break;
 
+case LAMBDA_EXPR:
+  {
+   /* Look in the parms and body.  */
+   tree fn = lambda_function (t);
+   WALK_SUBTREE (TREE_TYPE (fn));
+   WALK_SUBTREE (DECL_SAVED_TREE (fn));
+  }
+  break;
+
 default:
   break;
 }
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-lambda5.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-lambda5.C
new file mode 100644
index 000..fe471899c14
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-lambda5.C
@@ -0,0 +1,10 @@
+// PR c++/92556
+// { dg-do compile { target c++2a } }
+
+// Having this as a hard error is consistent with template argument deduction;
+// it's an open core issue (jason 2020-02-14).
+template  concept has_value
+  = requires { []{T::value;}; }; // { dg-error "" }
+template  void f() { }
+template  void f() { }
+void q() { f(); }

base-commit: d71365427670a791c5b54bfec6e3d41210844a8a
-- 
2.18.1



[PATCH 02/10] i386: Use ix86_output_ssemov for XImode TYPE_SSEMOV

2020-02-15 Thread H.J. Lu
PR target/89229
* config/i386/i386.md (*movxi_internal_avx512f): Call
ix86_output_ssemov for TYPE_SSEMOV.
---
 gcc/config/i386/i386.md | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index f14683cd14f..b30e5a51edc 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1902,11 +1902,7 @@ (define_insn "*movxi_internal_avx512f"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
-  if (misaligned_operand (operands[0], XImode)
- || misaligned_operand (operands[1], XImode))
-   return "vmovdqu32\t{%1, %0|%0, %1}";
-  else
-   return "vmovdqa32\t{%1, %0|%0, %1}";
+  return ix86_output_ssemov (insn, operands);
 
 default:
   gcc_unreachable ();
-- 
2.24.1



[PATCH 06/10] i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV

2020-02-15 Thread H.J. Lu
There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

gcc/

PR target/89229
* config/i386/i386.md (*movsi_internal): Call ix86_output_ssemov
for TYPE_SSEMOV.  Remove ext_sse_reg_operand and TARGET_AVX512VL
check.

gcc/testsuite/

PR target/89229
* gcc.target/i386/pr89229-4a.c: New test.
* gcc.target/i386/pr89229-4b.c: Likewise.
* gcc.target/i386/pr89229-4c.c: Likewise.
---
 gcc/config/i386/i386.md| 25 ++
 gcc/testsuite/gcc.target/i386/pr89229-4a.c | 17 +++
 gcc/testsuite/gcc.target/i386/pr89229-4b.c |  6 ++
 gcc/testsuite/gcc.target/i386/pr89229-4c.c |  7 ++
 4 files changed, 32 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4c.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 03d8078e957..05815c5cf3b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2261,25 +2261,7 @@ (define_insn "*movsi_internal"
   gcc_unreachable ();
 
 case TYPE_SSEMOV:
-  switch (get_attr_mode (insn))
-   {
-   case MODE_SI:
-  return "%vmovd\t{%1, %0|%0, %1}";
-   case MODE_TI:
- return "%vmovdqa\t{%1, %0|%0, %1}";
-   case MODE_XI:
- return "vmovdqa32\t{%g1, %g0|%g0, %g1}";
-
-   case MODE_V4SF:
- return "%vmovaps\t{%1, %0|%0, %1}";
-
-   case MODE_SF:
- gcc_assert (!TARGET_AVX);
-  return "movss\t{%1, %0|%0, %1}";
-
-   default:
- gcc_unreachable ();
-   }
+  return ix86_output_ssemov (insn, operands);
 
 case TYPE_MMX:
   return "pxor\t%0, %0";
@@ -2345,10 +2327,7 @@ (define_insn "*movsi_internal"
  (cond [(eq_attr "alternative" "2,3")
  (const_string "DI")
(eq_attr "alternative" "8,9")
- (cond [(ior (match_operand 0 "ext_sse_reg_operand")
- (match_operand 1 "ext_sse_reg_operand"))
-  (const_string "XI")
-(match_test "TARGET_AVX")
+ (cond [(match_test "TARGET_AVX")
   (const_string "TI")
 (ior (not (match_test "TARGET_SSE2"))
  (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4a.c 
b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
new file mode 100644
index 000..fd56f447016
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern int i;
+
+int
+foo1 (void)
+{
+  register int xmm16 __asm ("xmm16") = i;
+  asm volatile ("" : "+v" (xmm16));
+  register int xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  return xmm17;
+}
+
+/* { dg-final { scan-assembler-times 
"vmovdqa32\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4b.c 
b/gcc/testsuite/gcc.target/i386/pr89229-4b.c
new file mode 100644
index 000..023e81253a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-4a.c"
+
+/* { dg-final { scan-assembler-times 
"vmovdqa32\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4c.c 
b/gcc/testsuite/gcc.target/i386/pr89229-4c.c
new file mode 100644
index 000..bb728082e96
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-4a.c"
+
+/* { dg-final { scan-assembler-times 
"vmovdqa32\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1



[PATCH 03/10] i386: Use ix86_output_ssemov for OImode TYPE_SSEMOV

2020-02-15 Thread H.J. Lu
There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode ymm16-ymm31 registers with and without AVX512VL.

PR target/89229
* config/i386/i386.md (*movoi_internal_avx): Call
ix86_output_ssemov for TYPE_SSEMOV.  Remove ext_sse_reg_operand
and TARGET_AVX512VL check.
---
 gcc/config/i386/i386.md | 26 ++
 1 file changed, 2 insertions(+), 24 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b30e5a51edc..9e9b17d0913 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1925,21 +1925,7 @@ (define_insn "*movoi_internal_avx"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
-  if (misaligned_operand (operands[0], OImode)
- || misaligned_operand (operands[1], OImode))
-   {
- if (get_attr_mode (insn) == MODE_XI)
-   return "vmovdqu32\t{%1, %0|%0, %1}";
- else
-   return "vmovdqu\t{%1, %0|%0, %1}";
-   }
-  else
-   {
- if (get_attr_mode (insn) == MODE_XI)
-   return "vmovdqa32\t{%1, %0|%0, %1}";
- else
-   return "vmovdqa\t{%1, %0|%0, %1}";
-   }
+  return ix86_output_ssemov (insn, operands);
 
 default:
   gcc_unreachable ();
@@ -1948,15 +1934,7 @@ (define_insn "*movoi_internal_avx"
   [(set_attr "isa" "*,avx2,*,*")
(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
(set_attr "prefix" "vex")
-   (set (attr "mode")
-   (cond [(ior (match_operand 0 "ext_sse_reg_operand")
-   (match_operand 1 "ext_sse_reg_operand"))
-(const_string "XI")
-  (and (eq_attr "alternative" "1")
-   (match_test "TARGET_AVX512VL"))
-(const_string "XI")
- ]
- (const_string "OI")))])
+   (set_attr "mode" "OI")])
 
 (define_insn "*movti_internal"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd")
-- 
2.24.1



[PATCH 00/10] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move

2020-02-15 Thread H.J. Lu
This patch set was originally submitted in Feb 2019:

https://gcc.gnu.org/ml/gcc-patches/2019-02/msg01841.html

I broke it into 10 smaller patches for easy review.

On x86, when AVX and AVX512 are enabled, vector move instructions can
be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512):

   0:   c5 f9 6f d1 vmovdqa %xmm1,%xmm2
   4:   62 f1 fd 08 6f d1   vmovdqa64 %xmm1,%xmm2

We prefer VEX encoding over EVEX since VEX is shorter.  Also AVX512F
only supports 512-bit vector moves.  AVX512F + AVX512VL supports 128-bit
and 256-bit vector moves.  Mode attributes on x86 vector move patterns
indicate target preferences of vector move encoding.  For vector register
to vector register move, we can use 512-bit vector move instructions to
move 128-bit/256-bit vector if AVX512VL isn't available.  With AVX512F
and AVX512VL, we should use VEX encoding for 128-bit/256-bit vector moves
if upper 16 vector registers aren't used.  This patch adds a function,
ix86_output_ssemov, to generate vector moves:

1. If zmm registers are used, use EVEX encoding.
2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding
will be generated.
3. If xmm16-xmm31/ymm16-ymm31 registers are used:
   a. With AVX512VL, AVX512VL vector moves will be generated.
   b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register
  move will be done with zmm register move.

Tested on AVX2 and AVX512 with and without --with-arch=native.

H.J. Lu (10):
  i386: Properly encode vector registers in vector move
  i386: Use ix86_output_ssemov for XImode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for OImode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for TImode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for TFmode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV
  i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV

 gcc/config/i386/i386-protos.h |   2 +
 gcc/config/i386/i386.c| 274 ++
 gcc/config/i386/i386.md   | 212 +-
 gcc/config/i386/mmx.md|  29 +-
 gcc/config/i386/predicates.md |   5 -
 gcc/config/i386/sse.md|  98 +--
 .../gcc.target/i386/avx512vl-vmovdqa64-1.c|   7 +-
 gcc/testsuite/gcc.target/i386/pr89229-2a.c|  15 +
 gcc/testsuite/gcc.target/i386/pr89229-2b.c|  13 +
 gcc/testsuite/gcc.target/i386/pr89229-2c.c|   6 +
 gcc/testsuite/gcc.target/i386/pr89229-3a.c|  17 ++
 gcc/testsuite/gcc.target/i386/pr89229-3b.c|   6 +
 gcc/testsuite/gcc.target/i386/pr89229-3c.c|   7 +
 gcc/testsuite/gcc.target/i386/pr89229-4a.c|  17 ++
 gcc/testsuite/gcc.target/i386/pr89229-4b.c|   6 +
 gcc/testsuite/gcc.target/i386/pr89229-4c.c|   7 +
 gcc/testsuite/gcc.target/i386/pr89229-5a.c|  16 +
 gcc/testsuite/gcc.target/i386/pr89229-5b.c|  12 +
 gcc/testsuite/gcc.target/i386/pr89229-5c.c|   6 +
 gcc/testsuite/gcc.target/i386/pr89229-6a.c|  16 +
 gcc/testsuite/gcc.target/i386/pr89229-6b.c|   7 +
 gcc/testsuite/gcc.target/i386/pr89229-6c.c|   6 +
 gcc/testsuite/gcc.target/i386/pr89229-7a.c|  16 +
 gcc/testsuite/gcc.target/i386/pr89229-7b.c|   6 +
 gcc/testsuite/gcc.target/i386/pr89229-7c.c|   6 +
 gcc/testsuite/gcc.target/i386/pr89346.c   |  15 +
 26 files changed, 497 insertions(+), 330 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7c.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89346.c

-- 
2.24.1



[PATCH 04/10] i386: Use ix86_output_ssemov for TImode TYPE_SSEMOV

2020-02-15 Thread H.J. Lu
There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

gcc/

PR target/89229
* config/i386/i386.md (*movti_internal): Call ix86_output_ssemov
for TYPE_SSEMOV.  Remove ext_sse_reg_operand and TARGET_AVX512VL
check.

gcc/testsuite/

PR target/89229
* gcc.target/i386/pr89229-2a.c: New test.
* gcc.target/i386/pr89229-2b.c: Likewise.
* gcc.target/i386/pr89229-2c.c: Likewise.
---
 gcc/config/i386/i386.md| 28 +-
 gcc/testsuite/gcc.target/i386/pr89229-2a.c | 15 
 gcc/testsuite/gcc.target/i386/pr89229-2b.c | 13 ++
 gcc/testsuite/gcc.target/i386/pr89229-2c.c |  6 +
 4 files changed, 35 insertions(+), 27 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2c.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9e9b17d0913..5607d1ecddc 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1955,27 +1955,7 @@ (define_insn "*movti_internal"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
-  /* TDmode values are passed as TImode on the stack.  Moving them
-to stack may result in unaligned memory access.  */
-  if (misaligned_operand (operands[0], TImode)
- || misaligned_operand (operands[1], TImode))
-   {
- if (get_attr_mode (insn) == MODE_V4SF)
-   return "%vmovups\t{%1, %0|%0, %1}";
- else if (get_attr_mode (insn) == MODE_XI)
-   return "vmovdqu32\t{%1, %0|%0, %1}";
- else
-   return "%vmovdqu\t{%1, %0|%0, %1}";
-   }
-  else
-   {
- if (get_attr_mode (insn) == MODE_V4SF)
-   return "%vmovaps\t{%1, %0|%0, %1}";
- else if (get_attr_mode (insn) == MODE_XI)
-   return "vmovdqa32\t{%1, %0|%0, %1}";
- else
-   return "%vmovdqa\t{%1, %0|%0, %1}";
-   }
+  return ix86_output_ssemov (insn, operands);
 
 default:
   gcc_unreachable ();
@@ -2002,12 +1982,6 @@ (define_insn "*movti_internal"
(set (attr "mode")
(cond [(eq_attr "alternative" "0,1")
 (const_string "DI")
-  (ior (match_operand 0 "ext_sse_reg_operand")
-   (match_operand 1 "ext_sse_reg_operand"))
-(const_string "XI")
-  (and (eq_attr "alternative" "3")
-   (match_test "TARGET_AVX512VL"))
-(const_string "XI")
   (match_test "TARGET_AVX")
 (const_string "TI")
   (ior (not (match_test "TARGET_SSE2"))
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2a.c 
b/gcc/testsuite/gcc.target/i386/pr89229-2a.c
new file mode 100644
index 000..0cf78039481
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-2a.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+typedef __int128 __m128t __attribute__ ((__vector_size__ (16),
+__may_alias__));
+
+__m128t
+foo1 (void)
+{
+  register __int128 xmm16 __asm ("xmm16") = (__int128) -1;
+  asm volatile ("" : "+v" (xmm16));
+  return (__m128t) xmm16;
+}
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2b.c 
b/gcc/testsuite/gcc.target/i386/pr89229-2b.c
new file mode 100644
index 000..8d5d6c41d30
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-2b.c
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+typedef __int128 __m128t __attribute__ ((__vector_size__ (16),
+__may_alias__));
+
+__m128t
+foo1 (void)
+{
+  register __int128 xmm16 __asm ("xmm16") = (__int128) -1; /* { dg-error 
"register specified for 'xmm16'" } */
+  asm volatile ("" : "+v" (xmm16));
+  return (__m128t) xmm16;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2c.c 
b/gcc/testsuite/gcc.target/i386/pr89229-2c.c
new file mode 100644
index 000..218da46dcd0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-2c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-2a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1



[PATCH 05/10] i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV

2020-02-15 Thread H.J. Lu
There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

gcc/

PR target/89229
* config/i386/i386.md (*movdi_internal): Call ix86_output_ssemov
for TYPE_SSEMOV.  Remove ext_sse_reg_operand and TARGET_AVX512VL
check.

gcc/testsuite/

PR target/89229
* gcc.target/i386/pr89229-3a.c: New test.
* gcc.target/i386/pr89229-3b.c: Likewise.
* gcc.target/i386/pr89229-3c.c: Likewise.
---
 gcc/config/i386/i386.md| 31 ++
 gcc/testsuite/gcc.target/i386/pr89229-3a.c | 17 
 gcc/testsuite/gcc.target/i386/pr89229-3b.c |  6 +
 gcc/testsuite/gcc.target/i386/pr89229-3c.c |  7 +
 4 files changed, 32 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3c.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5607d1ecddc..03d8078e957 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2054,31 +2054,7 @@ (define_insn "*movdi_internal"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
-  switch (get_attr_mode (insn))
-   {
-   case MODE_DI:
- /* Handle broken assemblers that require movd instead of movq.  */
- if (!HAVE_AS_IX86_INTERUNIT_MOVQ
- && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
-   return "%vmovd\t{%1, %0|%0, %1}";
- return "%vmovq\t{%1, %0|%0, %1}";
-
-   case MODE_TI:
- /* Handle AVX512 registers set.  */
- if (EXT_REX_SSE_REG_P (operands[0])
- || EXT_REX_SSE_REG_P (operands[1]))
-   return "vmovdqa64\t{%1, %0|%0, %1}";
- return "%vmovdqa\t{%1, %0|%0, %1}";
-
-   case MODE_V2SF:
- gcc_assert (!TARGET_AVX);
- return "movlps\t{%1, %0|%0, %1}";
-   case MODE_V4SF:
- return "%vmovaps\t{%1, %0|%0, %1}";
-
-   default:
- gcc_unreachable ();
-   }
+  return ix86_output_ssemov (insn, operands);
 
 case TYPE_SSECVT:
   if (SSE_REG_P (operands[0]))
@@ -2164,10 +2140,7 @@ (define_insn "*movdi_internal"
  (cond [(eq_attr "alternative" "2")
  (const_string "SI")
(eq_attr "alternative" "12,13")
- (cond [(ior (match_operand 0 "ext_sse_reg_operand")
- (match_operand 1 "ext_sse_reg_operand"))
-  (const_string "TI")
-(match_test "TARGET_AVX")
+ (cond [(match_test "TARGET_AVX")
   (const_string "TI")
 (ior (not (match_test "TARGET_SSE2"))
  (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3a.c 
b/gcc/testsuite/gcc.target/i386/pr89229-3a.c
new file mode 100644
index 000..cb9b071e873
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-3a.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+extern long long i;
+
+long long
+foo1 (void)
+{
+  register long long xmm16 __asm ("xmm16") = i;
+  asm volatile ("" : "+v" (xmm16));
+  register long long xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  return xmm17;
+}
+
+/* { dg-final { scan-assembler-times 
"vmovdqa64\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3b.c 
b/gcc/testsuite/gcc.target/i386/pr89229-3b.c
new file mode 100644
index 000..9265fc0354b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-3b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-3a.c"
+
+/* { dg-final { scan-assembler-times 
"vmovdqa32\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3c.c 
b/gcc/testsuite/gcc.target/i386/pr89229-3c.c
new file mode 100644
index 000..be0ca78a37e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-3c.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-3a.c"
+
+/* { dg-final { scan-assembler-times 
"vmovdqa64\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1



[PATCH 01/10] i386: Properly encode vector registers in vector move

2020-02-15 Thread H.J. Lu
On x86, when AVX and AVX512 are enabled, vector move instructions can
be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512):

   0:   c5 f9 6f d1 vmovdqa %xmm1,%xmm2
   4:   62 f1 fd 08 6f d1   vmovdqa64 %xmm1,%xmm2

We prefer VEX encoding over EVEX since VEX is shorter.  Also AVX512F
only supports 512-bit vector moves.  AVX512F + AVX512VL supports 128-bit
and 256-bit vector moves.  Mode attributes on x86 vector move patterns
indicate target preferences of vector move encoding.  For vector register
to vector register move, we can use 512-bit vector move instructions to
move 128-bit/256-bit vector if AVX512VL isn't available.  With AVX512F
and AVX512VL, we should use VEX encoding for 128-bit/256-bit vector moves
if upper 16 vector registers aren't used.  This patch adds a function,
ix86_output_ssemov, to generate vector moves:

1. If zmm registers are used, use EVEX encoding.
2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding
will be generated.
3. If xmm16-xmm31/ymm16-ymm31 registers are used:
   a. With AVX512VL, AVX512VL vector moves will be generated.
   b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register
  move will be done with zmm register move.

Tested on AVX2 and AVX512 with and without --with-arch=native.

gcc/

PR target/89229
PR target/89346
* config/i386/i386-protos.h (ix86_output_ssemov): New prototype.
* config/i386/i386.c (ix86_get_ssemov): New function.
(ix86_output_ssemov): Likewise.
* config/i386/sse.md (VMOVE:mov_internal): Call
ix86_output_ssemov for TYPE_SSEMOV.  Remove TARGET_AVX512VL
check.

gcc/testsuite/

PR target/89229
PR target/89346
* gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated.
* gcc.target/i386/pr89229-2a.c: New test.
---
 gcc/config/i386/i386-protos.h |   2 +
 gcc/config/i386/i386.c| 274 ++
 gcc/config/i386/sse.md|  98 +--
 .../gcc.target/i386/avx512vl-vmovdqa64-1.c|   7 +-
 gcc/testsuite/gcc.target/i386/pr89346.c   |  15 +
 5 files changed, 296 insertions(+), 100 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89346.c

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 266381ca5a6..39fcaa0ad5f 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -38,6 +38,8 @@ extern void ix86_expand_split_stack_prologue (void);
 extern void ix86_output_addr_vec_elt (FILE *, int);
 extern void ix86_output_addr_diff_elt (FILE *, int, int);
 
+extern const char *ix86_output_ssemov (rtx_insn *, rtx *);
+
 extern enum calling_abi ix86_cfun_abi (void);
 extern enum calling_abi ix86_function_type_abi (const_tree);
 
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index dac7a3fc5fd..26f8c9494b9 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -4915,6 +4915,280 @@ ix86_pre_reload_split (void)
  && !(cfun->curr_properties & PROP_rtl_split_insns));
 }
 
+/* Return the opcode of the TYPE_SSEMOV instruction.  To move from
+   or to xmm16-xmm31/ymm16-ymm31 registers, we either require
+   TARGET_AVX512VL or it is a register to register move which can
+   be done with zmm register move. */
+
+static const char *
+ix86_get_ssemov (rtx *operands, unsigned size,
+enum attr_mode insn_mode, machine_mode mode)
+{
+  char buf[128];
+  bool misaligned_p = (misaligned_operand (operands[0], mode)
+  || misaligned_operand (operands[1], mode));
+  bool evex_reg_p = (EXT_REX_SSE_REG_P (operands[0])
+|| EXT_REX_SSE_REG_P (operands[1]));
+  machine_mode scalar_mode;
+
+  const char *opcode = NULL;
+  enum
+{
+  opcode_int,
+  opcode_float,
+  opcode_double
+} type = opcode_int;
+
+  switch (insn_mode)
+{
+case MODE_V16SF:
+case MODE_V8SF:
+case MODE_V4SF:
+  scalar_mode = E_SFmode;
+  break;
+case MODE_V8DF:
+case MODE_V4DF:
+case MODE_V2DF:
+  scalar_mode = E_DFmode;
+  break;
+case MODE_XI:
+case MODE_OI:
+case MODE_TI:
+  scalar_mode = GET_MODE_INNER (mode);
+  break;
+default:
+  gcc_unreachable ();
+}
+
+  if (SCALAR_FLOAT_MODE_P (scalar_mode))
+{
+  switch (scalar_mode)
+   {
+   case E_SFmode:
+ if (size == 64 || !evex_reg_p || TARGET_AVX512VL)
+   opcode = misaligned_p ? "%vmovups" : "%vmovaps";
+ else
+   type = opcode_float;
+ break;
+   case E_DFmode:
+ if (size == 64 || !evex_reg_p || TARGET_AVX512VL)
+   opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
+ else
+   type = opcode_double;
+ break;
+   case E_TFmode:
+ if (size == 64)
+   opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
+ else if (evex_reg_p)
+   {
+ if (TARGET_AVX512VL)
+   

[PATCH 08/10] i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV

2020-02-15 Thread H.J. Lu
There is no need to set mode attribute to XImode nor V8DFmode since
ix86_output_ssemov can properly encode xmm16-xmm31 registers with and
without AVX512VL.

gcc/

PR target/89229
* config/i386/i386.md (*movdf_internal): Call ix86_output_ssemov
for TYPE_SSEMOV.  Remove TARGET_AVX512F, TARGET_PREFER_AVX256,
TARGET_AVX512VL and ext_sse_reg_operand check.

gcc/testsuite/

PR target/89229
* gcc.target/i386/pr89229-6a.c: New test.
* gcc.target/i386/pr89229-6b.c: Likewise.
* gcc.target/i386/pr89229-6c.c: Likewise.
---
 gcc/config/i386/i386.md| 44 ++
 gcc/testsuite/gcc.target/i386/pr89229-6a.c | 16 
 gcc/testsuite/gcc.target/i386/pr89229-6b.c |  7 
 gcc/testsuite/gcc.target/i386/pr89229-6c.c |  6 +++
 4 files changed, 32 insertions(+), 41 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6c.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index fdf0e5a8802..01892992adb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3307,37 +3307,7 @@ (define_insn "*movdf_internal"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
-  switch (get_attr_mode (insn))
-   {
-   case MODE_DF:
- if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
-   return "vmovsd\t{%d1, %0|%0, %d1}";
- return "%vmovsd\t{%1, %0|%0, %1}";
-
-   case MODE_V4SF:
- return "%vmovaps\t{%1, %0|%0, %1}";
-   case MODE_V8DF:
- return "vmovapd\t{%g1, %g0|%g0, %g1}";
-   case MODE_V2DF:
- return "%vmovapd\t{%1, %0|%0, %1}";
-
-   case MODE_V2SF:
- gcc_assert (!TARGET_AVX);
- return "movlps\t{%1, %0|%0, %1}";
-   case MODE_V1DF:
- gcc_assert (!TARGET_AVX);
- return "movlpd\t{%1, %0|%0, %1}";
-
-   case MODE_DI:
- /* Handle broken assemblers that require movd instead of movq.  */
- if (!HAVE_AS_IX86_INTERUNIT_MOVQ
- && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
-   return "%vmovd\t{%1, %0|%0, %1}";
- return "%vmovq\t{%1, %0|%0, %1}";
-
-   default:
- gcc_unreachable ();
-   }
+  return ix86_output_ssemov (insn, operands);
 
 default:
   gcc_unreachable ();
@@ -3391,10 +3361,7 @@ (define_insn "*movdf_internal"
 
   /* xorps is one byte shorter for non-AVX targets.  */
   (eq_attr "alternative" "12,16")
-(cond [(and (match_test "TARGET_AVX512F")
-(not (match_test "TARGET_PREFER_AVX256")))
- (const_string "XI")
-   (match_test "TARGET_AVX")
+(cond [(match_test "TARGET_AVX")
  (const_string "V2DF")
(ior (not (match_test "TARGET_SSE2"))
 (match_test "optimize_function_for_size_p (cfun)"))
@@ -3410,12 +3377,7 @@ (define_insn "*movdf_internal"
 
   /* movaps is one byte shorter for non-AVX targets.  */
   (eq_attr "alternative" "13,17")
-(cond [(and (ior (not (match_test "TARGET_PREFER_AVX256"))
- (not (match_test "TARGET_AVX512VL")))
-(ior (match_operand 0 "ext_sse_reg_operand")
- (match_operand 1 "ext_sse_reg_operand")))
- (const_string "V8DF")
-   (match_test "TARGET_AVX")
+(cond [(match_test "TARGET_AVX")
  (const_string "DF")
(ior (not (match_test "TARGET_SSE2"))
 (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-6a.c 
b/gcc/testsuite/gcc.target/i386/pr89229-6a.c
new file mode 100644
index 000..5bc10d25619
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-6a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern double d;
+
+void
+foo1 (double x)
+{
+  register double xmm16 __asm ("xmm16") = x;
+  asm volatile ("" : "+v" (xmm16));
+  register double xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  d = xmm17;
+}
+
+/* { dg-final { scan-assembler-not "vmovapd" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-6b.c 
b/gcc/testsuite/gcc.target/i386/pr89229-6b.c
new file mode 100644
index 000..b248a3726f4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-6b.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-6a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
+/* { dg-fina

[PATCH 09/10] i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV

2020-02-15 Thread H.J. Lu
There is no need to set mode attribute to V16SFmode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

gcc/

PR target/89229
* config/i386/i386.md (*movdf_internal): Call ix86_output_ssemov
for TYPE_SSEMOV.  Remove TARGET_PREFER_AVX256, TARGET_AVX512VL
and ext_sse_reg_operand check.

gcc/testsuite/

PR target/89229
* gcc.target/i386/pr89229-7a.c: New test.
* gcc.target/i386/pr89229-7b.c: Likewise.
* gcc.target/i386/pr89229-7c.c: Likewise.
---
 gcc/config/i386/i386.md| 26 ++
 gcc/testsuite/gcc.target/i386/pr89229-7a.c | 16 +
 gcc/testsuite/gcc.target/i386/pr89229-7b.c |  6 +
 gcc/testsuite/gcc.target/i386/pr89229-7c.c |  6 +
 4 files changed, 30 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7c.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 01892992adb..2dcf2d598c3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3469,24 +3469,7 @@ (define_insn "*movsf_internal"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
-  switch (get_attr_mode (insn))
-   {
-   case MODE_SF:
- if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
-   return "vmovss\t{%d1, %0|%0, %d1}";
- return "%vmovss\t{%1, %0|%0, %1}";
-
-   case MODE_V16SF:
- return "vmovaps\t{%g1, %g0|%g0, %g1}";
-   case MODE_V4SF:
- return "%vmovaps\t{%1, %0|%0, %1}";
-
-   case MODE_SI:
- return "%vmovd\t{%1, %0|%0, %1}";
-
-   default:
- gcc_unreachable ();
-   }
+  return ix86_output_ssemov (insn, operands);
 
 case TYPE_MMXMOV:
   switch (get_attr_mode (insn))
@@ -3558,12 +3541,7 @@ (define_insn "*movsf_internal"
  better to maintain the whole registers in single format
  to avoid problems on using packed logical operations.  */
   (eq_attr "alternative" "6")
-(cond [(and (ior (not (match_test "TARGET_PREFER_AVX256"))
- (not (match_test "TARGET_AVX512VL")))
-(ior (match_operand 0 "ext_sse_reg_operand")
- (match_operand 1 "ext_sse_reg_operand")))
- (const_string "V16SF")
-   (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
+(cond [(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
 (match_test "TARGET_SSE_SPLIT_REGS"))
  (const_string "V4SF")
   ]
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7a.c 
b/gcc/testsuite/gcc.target/i386/pr89229-7a.c
new file mode 100644
index 000..856115b2f5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-7a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern float d;
+
+void
+foo1 (float x)
+{
+  register float xmm16 __asm ("xmm16") = x;
+  asm volatile ("" : "+v" (xmm16));
+  register float xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  d = xmm17;
+}
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7b.c 
b/gcc/testsuite/gcc.target/i386/pr89229-7b.c
new file mode 100644
index 000..93d1e43770c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-7b.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+#include "pr89229-7a.c"
+
+/* { dg-final { scan-assembler-times 
"vmovaps\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7c.c 
b/gcc/testsuite/gcc.target/i386/pr89229-7c.c
new file mode 100644
index 000..e37ff2bf5bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-7c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-7a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1



[PATCH 10/10] i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV

2020-02-15 Thread H.J. Lu
There is no need to set mode attribute to XImode since ix86_output_ssemov
can properly encode xmm16-xmm31 registers with and without AVX512VL.

Remove ext_sse_reg_operand since it is no longer needed.

PR target/89229
* config/i386/mmx.md (MMXMODE:*mov_internal): Call
ix86_output_ssemov for TYPE_SSEMOV.  Remove ext_sse_reg_operand
check.
* config/i386/predicates.md (ext_sse_reg_operand): Removed.
---
 gcc/config/i386/mmx.md| 29 ++---
 gcc/config/i386/predicates.md |  5 -
 2 files changed, 2 insertions(+), 32 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f695831b5b9..7d9db5d352c 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -118,29 +118,7 @@ (define_insn "*mov_internal"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
-  switch (get_attr_mode (insn))
-   {
-   case MODE_DI:
- /* Handle broken assemblers that require movd instead of movq.  */
- if (!HAVE_AS_IX86_INTERUNIT_MOVQ
- && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])))
-   return "%vmovd\t{%1, %0|%0, %1}";
- return "%vmovq\t{%1, %0|%0, %1}";
-   case MODE_TI:
- return "%vmovdqa\t{%1, %0|%0, %1}";
-   case MODE_XI:
- return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
-
-   case MODE_V2SF:
- if (TARGET_AVX && REG_P (operands[0]))
-   return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
- return "%vmovlps\t{%1, %0|%0, %1}";
-   case MODE_V4SF:
- return "%vmovaps\t{%1, %0|%0, %1}";
-
-   default:
- gcc_unreachable ();
-   }
+  return ix86_output_ssemov (insn, operands);
 
 default:
   gcc_unreachable ();
@@ -189,10 +167,7 @@ (define_insn "*mov_internal"
  (cond [(eq_attr "alternative" "2")
  (const_string "SI")
(eq_attr "alternative" "11,12")
- (cond [(ior (match_operand 0 "ext_sse_reg_operand")
- (match_operand 1 "ext_sse_reg_operand"))
-   (const_string "XI")
-(match_test "mode == V2SFmode")
+ (cond [(match_test "mode == V2SFmode")
   (const_string "V4SF")
 (ior (not (match_test "TARGET_SSE2"))
  (match_test "optimize_function_for_size_p (cfun)"))
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 1119366d54e..71f4cb1193c 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -61,11 +61,6 @@ (define_predicate "sse_reg_operand"
   (and (match_code "reg")
(match_test "SSE_REGNO_P (REGNO (op))")))
 
-;; True if the operand is an AVX-512 new register.
-(define_predicate "ext_sse_reg_operand"
-  (and (match_code "reg")
-   (match_test "EXT_REX_SSE_REGNO_P (REGNO (op))")))
-
 ;; Return true if op is a QImode register.
 (define_predicate "any_QIreg_operand"
   (and (match_code "reg")
-- 
2.24.1



[PATCH 07/10] i386: Use ix86_output_ssemov for TFmode TYPE_SSEMOV

2020-02-15 Thread H.J. Lu
gcc/

PR target/89229
* config/i386/i386.md (*movtf_internal): Call ix86_output_ssemov
for TYPE_SSEMOV.

gcc/testsuite/

PR target/89229
* gcc.target/i386/pr89229-5a.c: New test.
* gcc.target/i386/pr89229-5b.c: Likewise.
* gcc.target/i386/pr89229-5c.c: Likewise.
---
 gcc/config/i386/i386.md| 26 +-
 gcc/testsuite/gcc.target/i386/pr89229-5a.c | 16 +
 gcc/testsuite/gcc.target/i386/pr89229-5b.c | 12 ++
 gcc/testsuite/gcc.target/i386/pr89229-5c.c |  6 +
 4 files changed, 35 insertions(+), 25 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5b.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5c.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 05815c5cf3b..fdf0e5a8802 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3154,31 +3154,7 @@ (define_insn "*movtf_internal"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
-  /* Handle misaligned load/store since we
- don't have movmisaligntf pattern. */
-  if (misaligned_operand (operands[0], TFmode)
- || misaligned_operand (operands[1], TFmode))
-   {
- if (get_attr_mode (insn) == MODE_V4SF)
-   return "%vmovups\t{%1, %0|%0, %1}";
- else if (TARGET_AVX512VL
-  && (EXT_REX_SSE_REG_P (operands[0])
-  || EXT_REX_SSE_REG_P (operands[1])))
-   return "vmovdqu64\t{%1, %0|%0, %1}";
- else
-   return "%vmovdqu\t{%1, %0|%0, %1}";
-   }
-  else
-   {
- if (get_attr_mode (insn) == MODE_V4SF)
-   return "%vmovaps\t{%1, %0|%0, %1}";
- else if (TARGET_AVX512VL
-  && (EXT_REX_SSE_REG_P (operands[0])
-  || EXT_REX_SSE_REG_P (operands[1])))
-   return "vmovdqa64\t{%1, %0|%0, %1}";
- else
-   return "%vmovdqa\t{%1, %0|%0, %1}";
-   }
+  return ix86_output_ssemov (insn, operands);
 
 case TYPE_MULTI:
return "#";
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5a.c 
b/gcc/testsuite/gcc.target/i386/pr89229-5a.c
new file mode 100644
index 000..fcb85c366b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-5a.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512" } */
+
+extern __float128 d;
+
+void
+foo1 (__float128 x)
+{
+  register __float128 xmm16 __asm ("xmm16") = x;
+  asm volatile ("" : "+v" (xmm16));
+  register __float128 xmm17 __asm ("xmm17") = xmm16;
+  asm volatile ("" : "+v" (xmm17));
+  d = xmm17;
+}
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5b.c 
b/gcc/testsuite/gcc.target/i386/pr89229-5b.c
new file mode 100644
index 000..37eb83c783b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-5b.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
+
+extern __float128 d;
+
+void
+foo1 (__float128 x)
+{
+  register __float128 xmm16 __asm ("xmm16") = x; /* { dg-error "register 
specified for 'xmm16'" } */
+  asm volatile ("" : "+v" (xmm16));
+  d = xmm16;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5c.c 
b/gcc/testsuite/gcc.target/i386/pr89229-5c.c
new file mode 100644
index 000..529a520133c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89229-5c.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
+
+#include "pr89229-5a.c"
+
+/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
-- 
2.24.1



Re: [PATCH 1/3] libstdc++: Fold some ranges algo subroutines into their only caller

2020-02-15 Thread Patrick Palka
On Sat, 15 Feb 2020, Jonathan Wakely wrote:

> On 14/02/20 10:35 -0500, Patrick Palka wrote:
> > These subroutines have only a single call site, so it might be best and
> > simplest
> > to eliminate them before we convert the algos into function objects.
> > 
> > libstdc++-v3/ChangeLog:
> > 
> > * include/bits/ranges_algo.h (ranges::__find_end): Fold into ...
> > (ranges::find_end): ... here.
> > (ranges::__lexicographical_compare): Fold into ...
> > (ranges::lexicographical_compare): ... here.
> > * include/bits/ranges_algobase.h (ranges::__equal): Fold into ...
> > (ranges::equal): ... here.
> 
> OK for master, but please note the two comments below.
> 
> 
> > libstdc++-v3/include/bits/ranges_algo.h | 104 
> > libstdc++-v3/include/bits/ranges_algobase.h |  33 +++
> > 2 files changed, 55 insertions(+), 82 deletions(-)
> > 
> > diff --git a/libstdc++-v3/include/bits/ranges_algo.h
> > b/libstdc++-v3/include/bits/ranges_algo.h
> > index 84a02cabb80..6b6f4defdf5 100644
> > --- a/libstdc++-v3/include/bits/ranges_algo.h
> > +++ b/libstdc++-v3/include/bits/ranges_algo.h
> > @@ -513,40 +513,7 @@ namespace ranges
> >   std::move(__pred), std::move(__proj));
> > }
> > 
> > -  template _Sent1,
> > -  forward_iterator _Iter2, sentinel_for<_Iter2> _Sent2,
> > -  typename _Pred = ranges::equal_to,
> > -  typename _Proj1 = identity, typename _Proj2 = identity>
> > -requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2>
> > -constexpr subrange<_Iter1>
> > -__find_end(_Iter1 __first1, _Sent1 __last1,
> > -  _Iter2 __first2, _Sent2 __last2,
> > -  _Pred __pred, _Proj1 __proj1, _Proj2 __proj2)
> > -{
> > -  auto __i = ranges::next(__first1, __last1);
> > -  if (__first2 == __last2)
> > -   return {__i, __i};
> > 
> > -  auto __result_begin = __i;
> > -  auto __result_end = __i;
> > -  for (;;)
> > -   {
> > - auto __new_range = ranges::search(__first1, __last1,
> > -   __first2, __last2,
> > -   __pred, __proj1, __proj2);
> > - auto __new_result_begin = ranges::begin(__new_range);
> > - auto __new_result_end = ranges::end(__new_range);
> > - if (__new_result_begin == __last1)
> > -   return {__result_begin, __result_end};
> > - else
> > -   {
> > - __result_begin = __new_result_begin;
> > - __result_end = __new_result_end;
> > - __first1 = __result_begin;
> > - ++__first1;
> > -   }
> > -   }
> > -}
> > 
> >   template _Sent1,
> >forward_iterator _Iter2, sentinel_for<_Iter2> _Sent2,
> > @@ -578,9 +545,31 @@ namespace ranges
> > return {__result_first, __result_last};
> > }
> >   else
> > -   return ranges::__find_end(__first1, __last1, __first2, __last2,
> > - std::move(__pred),
> > - std::move(__proj1), std::move(__proj2));
> > +   {
> > + auto __i = ranges::next(__first1, __last1);
> > + if (__first2 == __last2)
> > +   return {__i, __i};
> > +
> > + auto __result_begin = __i;
> > + auto __result_end = __i;
> > + for (;;)
> > +   {
> > + auto __new_range = ranges::search(__first1, __last1,
> > +   __first2, __last2,
> > +   __pred, __proj1, __proj2);
> > + auto __new_result_begin = ranges::begin(__new_range);
> > + auto __new_result_end = ranges::end(__new_range);
> > + if (__new_result_begin == __last1)
> > +   return {__result_begin, __result_end};
> > + else
> > +   {
> > + __result_begin = __new_result_begin;
> > + __result_end = __new_result_end;
> > + __first1 = __result_begin;
> > + ++__first1;
> > +   }
> > +   }
> > +   }
> > }
> > 
> >   template > @@ -2908,14 +2897,26 @@ namespace ranges
> > 
> >   template _Sent1,
> >input_iterator _Iter2, sentinel_for<_Iter2> _Sent2,
> > -  typename _Proj1, typename _Proj2,
> > +  typename _Proj1 = identity, typename _Proj2 = identity,
> >indirect_strict_weak_order,
> > - projected<_Iter2, _Proj2>> _Comp>
> > + projected<_Iter2, _Proj2>>
> > +_Comp = ranges::less>
> > constexpr bool
> > -__lexicographical_compare(_Iter1 __first1, _Sent1 __last1,
> > - _Iter2 __first2, _Sent2 __last2,
> > - _Comp __comp, _Proj1 __proj1, _Proj2 __proj2)
> > +lexicographical_compare(_Iter1 __first1, _Sent1 __last1,
> > +   _Iter2 __first2, _Sent2 __last2,
> > +   _Comp __comp = {},
> > +   _Proj1 __proj1 = {}, _Proj2 __proj2 = {})
> > {
> > +  if constexpr (__detail::__is_normal_i

[PATCH] libstdc++: Move code after an early exit constexpr if to under an else branch

2020-02-15 Thread Patrick Palka
This avoids instantiating dead code when the true branch of the constexpr if is
taken.

[ diffstat generated with -w to ignore noisy whitespace changes ]

libstdc++-v3/ChangeLog:

* include/bits/ranges_algo.h (__lexicographical_compare_fn::operator()):
Move code after an early exit constexpr if to under an else branch.
* include/bits/ranges_algobase.h (__equal_fn::operator()): Likewise.
---
 libstdc++-v3/include/bits/ranges_algo.h | 7 +--
 libstdc++-v3/include/bits/ranges_algobase.h | 7 ++-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libstdc++-v3/include/bits/ranges_algo.h 
b/libstdc++-v3/include/bits/ranges_algo.h
index 7f8f0fb964b..ff1b40f6ace 100644
--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -3318,7 +3318,8 @@ namespace ranges
 std::__niter_base(std::move(__last2)),
 std::move(__comp),
 std::move(__proj1), std::move(__proj2));
-
+   else
+ {
constexpr bool __sized_iters
  = (sized_sentinel_for<_Sent1, _Iter1>
 && sized_sentinel_for<_Sent2, _Iter2>);
@@ -3342,7 +3343,8 @@ namespace ranges
  {
if (const auto __len = std::min(__d1, __d2))
  {
-   const auto __c = std::__memcmp(__first1, __first2, __len);
+   const auto __c
+ = std::__memcmp(__first1, __first2, __len);
if constexpr (is_same_v<_Comp, ranges::less>)
  {
if (__c < 0)
@@ -3378,6 +3380,7 @@ namespace ranges
  }
return __first1 == __last1 && __first2 != __last2;
  }
+  }
 
 template
-&& sized_sentinel_for<_Sent2, _Iter2>);
-   if constexpr (__sized_iters)
+   else if constexpr (sized_sentinel_for<_Sent1, _Iter1>
+  && sized_sentinel_for<_Sent2, _Iter2>)
  {
auto __d1 = ranges::distance(__first1, __last1);
auto __d2 = ranges::distance(__first2, __last2);
-- 
2.25.0.232.gd8437c57fa



libgo patch committed: On 32-bit systems, limit default GOMAXPROCS to 32

2020-02-15 Thread Ian Lance Taylor
This libgo patch limits the default value of GOMAXPROCS to 32 on
32-bit systems.  Otherwise we can easily run out of stack space for
threads.  The user can still override by setting GOMAXPROCS.
Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu.  Committed
to mainline.

Ian
a339c239a7ed8af25eb612ea4ceb5d975528b951
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 27f4ce342e5..9916b02c57f 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-c94637ad6fd38d4814fb02d094a1a73f19323d71
+3e46519cee5c916a9b39480fbac13f4ffc6a93b0
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go
index c0e85773098..e3f934ae7bd 100644
--- a/libgo/go/runtime/proc.go
+++ b/libgo/go/runtime/proc.go
@@ -563,6 +563,14 @@ func schedinit() {
 
sched.lastpoll = uint64(nanotime())
procs := ncpu
+
+   // In 32-bit mode, we can burn a lot of memory on thread stacks.
+   // Try to avoid this by limiting the number of threads we run
+   // by default.
+   if sys.PtrSize == 4 && procs > 32 {
+   procs = 32
+   }
+
if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
procs = n
}


libgo patch committed: Update to 1.14rc1

2020-02-15 Thread Ian Lance Taylor
I've committed a patch to update libgo to the 1.14rc1 release (this is
a release candidate for the 1.14 Go release).  Bootstrapped and ran Go
testsuite on x86_64-pc-linux-gnu.  Committed to mainline.

Ian


patch.txt.bz2
Description: application/bzip


Re: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function ABI.

2020-02-15 Thread GT
‐‐‐ Original Message ‐‐‐
On Friday, February 14, 2020 6:46 PM, Segher Boessenkool 
 wrote:

> On Fri, Feb 14, 2020 at 08:24:30PM +, GT wrote:
>
> > Function rs6000_simd_clone_adjust, even though it's body is empty,
> > cannot simply be removed. I tried it. It resulted in ICE. In my
> > view, leaving it empty is preferable to modifying other files
> > unrelated to rs6000.c in order to avoid having a function whose
> > body is empty.
>
> Please Cc: the rs6000 maintainers on rs6000 patches, you will get a
> reply faster, and more reliably.
>

File MAINTAINERS has you, David Edelsohn and Aldy Hernandez listed as 
maintainers
of various rs6000 aspects. Is that who you say I should "Cc:" or is there a
separate mailing list for rs6000?

> Please don't use binary attachments, it takes effort to reply to those.
>

I have not been able to configure protonmail for either git imap-send or 
send-email.
Will try pasting the .patch inline as plain text and see if that works.

Bert.


Re: [PATCH] libstdc++: Move code after an early exit constexpr if to under an else branch

2020-02-15 Thread Jonathan Wakely

On 15/02/20 11:28 -0500, Patrick Palka wrote:

This avoids instantiating dead code when the true branch of the constexpr if is
taken.

[ diffstat generated with -w to ignore noisy whitespace changes ]

libstdc++-v3/ChangeLog:

* include/bits/ranges_algo.h (__lexicographical_compare_fn::operator()):
Move code after an early exit constexpr if to under an else branch.
* include/bits/ranges_algobase.h (__equal_fn::operator()): Likewise.


OK for master, thanks!




[committed] c++: Add -std=c++20.

2020-02-15 Thread Jason Merrill
It's probably past time for this, but definitely now that we're done with
the final committee meeting of C++20.  This patch only adds the option and
adjusts the testsuite to recognize it; more extensive changes can wait for
the published standard.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/ChangeLog
2020-02-15  Jason Merrill  

* doc/invoke.texi (C Dialect Options): Add -std=c++20.

gcc/c-family/ChangeLog
2020-02-15  Jason Merrill  

* c.opt: Add -std=c++20.

gcc/testsuite/ChangeLog
2020-02-15  Jason Merrill  

* lib/target-supports.exp (check_effective_target_c++2a_only): Also
look for -std=*++20.
(check_effective_target_concepts): Use check_effective_target_c++2a.
---
 gcc/doc/invoke.texi   | 10 ++
 gcc/c-family/c.opt|  6 +-
 gcc/testsuite/lib/target-supports.exp |  9 ++---
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 597151670be..3e47d06f0d5 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -2078,13 +2078,15 @@ The name @samp{c++1z} is deprecated.
 GNU dialect of @option{-std=c++17}.
 The name @samp{gnu++1z} is deprecated.
 
-@item c++2a
-The next revision of the ISO C++ standard, tentatively planned for
+@item c++20
+@itemx c++2a
+The next revision of the ISO C++ standard, planned for
 2020.  Support is highly experimental, and will almost certainly
 change in incompatible ways in future releases.
 
-@item gnu++2a
-GNU dialect of @option{-std=c++2a}.  Support is highly experimental,
+@item gnu++20
+@itemx gnu++2a
+GNU dialect of @option{-std=c++20}.  Support is highly experimental,
 and will almost certainly change in incompatible ways in future
 releases.
 @end table
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 814ed17f7c4..b7e4fe146b2 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -2071,7 +2071,11 @@ Conform to the ISO 2017 C++ standard.
 
 std=c++2a
 C++ ObjC++
-Conform to the ISO 2020(?) C++ draft standard (experimental and incomplete 
support).
+Conform to the ISO 2020 C++ draft standard (experimental and incomplete 
support).
+
+std=c++20
+C++ ObjC++ Alias(std=c++2a)
+Conform to the ISO 2020 C++ draft standard (experimental and incomplete 
support).
 
 std=c11
 C ObjC
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index fb177c54aa8..ec462315860 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8806,7 +8806,7 @@ proc check_effective_target_c++2a_only { } {
 if ![check_effective_target_c++] {
return 0
 }
-if [check-flags { { } { } { -std=c++2a -std=gnu++2a } }] {
+if [check-flags { { } { } { -std=c++2a -std=gnu++2a -std=c++20 
-std=gnu++20 } }] {
return 1
 }
 if { $cxx_default == "c++20" && [check-flags { { } { } { } { -std=* } }] } 
{
@@ -8818,9 +8818,12 @@ proc check_effective_target_c++2a { } {
 return [check_effective_target_c++2a_only]
 }
 
-# Check for C++ Concepts TS support, i.e. -fconcepts flag.
+# Check for C++ Concepts support, i.e. -fconcepts flag.
 proc check_effective_target_concepts { } {
-return [check-flags { "" { } { -fconcepts -std=*2a } }]
+if [check_effective_target_c++2a] {
+   return 1
+}
+return [check-flags { "" { } { -fconcepts } }]
 }
 
 # Return 1 if expensive testcases should be run.

base-commit: 0b3c2eed35d608d6541ecf004a9576b4eae0b4ef
-- 
2.18.1



libbacktrace patch committed: Update test file

2020-02-15 Thread Ian Lance Taylor
This libbacktrace patch updates the test file used for comparisons
with zlib.  The file that the test was previously using, from libgo,
no longer exists.  Use its replacement file instead.  Bootstrapped and
ran libbacktrace tests on x86_64-pc-linux-gnu.  Committed to mainline.

Ian

2020-02-15  Ian Lance Taylor  

* ztest.c (test_large): Update file to current libgo test file.
diff --git a/libbacktrace/ztest.c b/libbacktrace/ztest.c
index 40f9c389a2a..2663c90061a 100644
--- a/libbacktrace/ztest.c
+++ b/libbacktrace/ztest.c
@@ -315,8 +315,8 @@ test_large (struct backtrace_state *state)
   size_t ctimes[16];
   size_t ztimes[16];
   static const char * const names[] = {
-"Mark.Twain-Tom.Sawyer.txt",
-"../libgo/go/compress/testdata/Mark.Twain-Tom.Sawyer.txt"
+"Isaac.Newton-Opticks.txt",
+"../libgo/go/testdata/Isaac.Newton-Opticks.txt",
   };
 
   orig_buf = NULL;