[PATCH] libstdc++: Implement LWG 3150 for std::uniform_random_bit_generator
* include/bits/random.h (uniform_random_bit_generator): Require min() and max() to be constant expressions and min() to be less than max(). * testsuite/26_numerics/random/concept.cc: Check additional cases. * testsuite/26_numerics/random/pr60037-neg.cc: Adjust dg-error lineno. Tested powerpc64le-linux, committed to master. commit 5b1d588509551291f4028497858ee9e04ce0bdee Author: Jonathan Wakely Date: Sat Feb 15 08:58:43 2020 + libstdc++: Implement LWG 3150 for std::uniform_random_bit_generator * include/bits/random.h (uniform_random_bit_generator): Require min() and max() to be constant expressions and min() to be less than max(). * testsuite/26_numerics/random/concept.cc: Check additional cases. * testsuite/26_numerics/random/pr60037-neg.cc: Adjust dg-error lineno. diff --git a/libstdc++-v3/include/bits/random.h b/libstdc++-v3/include/bits/random.h index 3eefdefc96f..d4aebf45af0 100644 --- a/libstdc++-v3/include/bits/random.h +++ b/libstdc++-v3/include/bits/random.h @@ -60,6 +60,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { { _Gen::min() } -> same_as>; { _Gen::max() } -> same_as>; + requires bool_constant<(_Gen::min() < _Gen::max())>::value; }; #endif diff --git a/libstdc++-v3/testsuite/26_numerics/random/concept.cc b/libstdc++-v3/testsuite/26_numerics/random/concept.cc index 69a7fc1e674..cb1ea882e16 100644 --- a/libstdc++-v3/testsuite/26_numerics/random/concept.cc +++ b/libstdc++-v3/testsuite/26_numerics/random/concept.cc @@ -219,3 +219,30 @@ struct N11 }; static_assert( ! std::uniform_random_bit_generator ); + +struct N12 +{ + unsigned operator()(); + static unsigned min() { return 0; } // not constexpr + static constexpr unsigned max() { return 1; } +}; + +static_assert( ! std::uniform_random_bit_generator ); // LWG 3150 + +struct N13 +{ + unsigned operator()(); + static constexpr unsigned min() { return 0; } + static unsigned max() { return 1; } // not constexpr +}; + +static_assert( ! std::uniform_random_bit_generator ); // LWG 3150 + +struct N14 +{ + unsigned operator()(); + static constexpr unsigned min() { return 1; } + static constexpr unsigned max() { return 0; } // max not greater than min +}; + +static_assert( ! std::uniform_random_bit_generator ); // LWG 3150 diff --git a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc index 201b87e9c52..91e5566c54a 100644 --- a/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc +++ b/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc @@ -10,6 +10,6 @@ std::__detail::_Adaptor aurng(urng); auto x = std::generate_canonical::digits>(urng); -// { dg-error "static assertion failed: template argument must be a floating point type" "" { target *-*-* } 171 } +// { dg-error "static assertion failed: template argument must be a floating point type" "" { target *-*-* } 172 } // { dg-error "static assertion failed: template argument must be a floating point type" "" { target *-*-* } 3281 }
[PATCH] libstdc++: Update __cpp_lib_erase_if macro (P1115R3)
Now that this feature has been approved for C++20 we can define the macro to the official value. * include/bits/erase_if.h (__cpp_lib_erase_if): Define to 202002L. * include/std/deque: Likewise. * include/std/forward_list: Likewise. * include/std/list: Likewise. * include/std/string: Likewise. * include/std/vector: Likewise. * include/std/version: Likewise. * testsuite/23_containers/deque/erasure.cc: Test for new value. * testsuite/23_containers/forward_list/erasure.cc: Likewise. * testsuite/23_containers/list/erasure.cc: Likewise. * testsuite/23_containers/map/erasure.cc: Likewise. * testsuite/23_containers/set/erasure.cc: Likewise. * testsuite/23_containers/unordered_map/erasure.cc: Likewise. * testsuite/23_containers/unordered_set/erasure.cc: Likewise. * testsuite/23_containers/vector/erasure.cc: Likewise. Tested powerpc64le-linux, committed to master. commit 55b00d14f4daf671b865550c119dafdeb3139672 Author: Jonathan Wakely Date: Sat Feb 15 09:02:30 2020 + libstdc++: Update __cpp_lib_erase_if macro (P1115R3) Now that this feature has been approved for C++20 we can define the macro to the official value. * include/bits/erase_if.h (__cpp_lib_erase_if): Define to 202002L. * include/std/deque: Likewise. * include/std/forward_list: Likewise. * include/std/list: Likewise. * include/std/string: Likewise. * include/std/vector: Likewise. * include/std/version: Likewise. * testsuite/23_containers/deque/erasure.cc: Test for new value. * testsuite/23_containers/forward_list/erasure.cc: Likewise. * testsuite/23_containers/list/erasure.cc: Likewise. * testsuite/23_containers/map/erasure.cc: Likewise. * testsuite/23_containers/set/erasure.cc: Likewise. * testsuite/23_containers/unordered_map/erasure.cc: Likewise. * testsuite/23_containers/unordered_set/erasure.cc: Likewise. * testsuite/23_containers/vector/erasure.cc: Likewise. diff --git a/libstdc++-v3/include/bits/erase_if.h b/libstdc++-v3/include/bits/erase_if.h index 2641fe63ba3..dc5e6ae584f 100644 --- a/libstdc++-v3/include/bits/erase_if.h +++ b/libstdc++-v3/include/bits/erase_if.h @@ -39,7 +39,7 @@ namespace std _GLIBCXX_BEGIN_NAMESPACE_VERSION #if __cplusplus > 201703L -# define __cpp_lib_erase_if 201900L +# define __cpp_lib_erase_if 202002L #endif namespace __detail diff --git a/libstdc++-v3/include/std/deque b/libstdc++-v3/include/std/deque index efe04dbbb7c..07e2c69d02d 100644 --- a/libstdc++-v3/include/std/deque +++ b/libstdc++-v3/include/std/deque @@ -91,7 +91,7 @@ namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION -#define __cpp_lib_erase_if 201900L +#define __cpp_lib_erase_if 202002L template inline typename deque<_Tp, _Alloc>::size_type diff --git a/libstdc++-v3/include/std/forward_list b/libstdc++-v3/include/std/forward_list index 8eb1ad150a7..439ef6b380c 100644 --- a/libstdc++-v3/include/std/forward_list +++ b/libstdc++-v3/include/std/forward_list @@ -62,7 +62,7 @@ namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION -#define __cpp_lib_erase_if 201900L +#define __cpp_lib_erase_if 202002L template inline typename forward_list<_Tp, _Alloc>::size_type diff --git a/libstdc++-v3/include/std/list b/libstdc++-v3/include/std/list index 1fb51a5abb4..7b3d3c2e888 100644 --- a/libstdc++-v3/include/std/list +++ b/libstdc++-v3/include/std/list @@ -86,7 +86,7 @@ namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION -#define __cpp_lib_erase_if 201900L +#define __cpp_lib_erase_if 202002L template inline typename list<_Tp, _Alloc>::size_type diff --git a/libstdc++-v3/include/std/string b/libstdc++-v3/include/std/string index 6a3b61f8622..a95ef4dd541 100644 --- a/libstdc++-v3/include/std/string +++ b/libstdc++-v3/include/std/string @@ -118,7 +118,7 @@ namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION -#define __cpp_lib_erase_if 201900L +#define __cpp_lib_erase_if 202002L template diff --git a/libstdc++-v3/include/std/vector b/libstdc++-v3/include/std/vector index 2a7b4c0f629..6f587a82a09 100644 --- a/libstdc++-v3/include/std/vector +++ b/libstdc++-v3/include/std/vector @@ -101,7 +101,7 @@ namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION -#define __cpp_lib_erase_if 201900L +#define __cpp_lib_erase_if 202002L template inline typename vector<_Tp, _Alloc>::size_type diff --git a/libstdc++-v3/include/std/version b/libstdc++-v3/include/std/version index d8a97767453..8a4affa60a0 100644 --- a/libstdc++-v3/include/std/version +++ b/libstdc++-v3/include/std/version @@ -184,7 +184,7 @@ #define __cpp_lib_constexpr_complex 201711L #define __cpp_l
Re: [PATCH] match.pd: Disallow side-effects in GENERIC for non-COND_EXPR to COND_EXPR simplifications [PR93744]
On February 15, 2020 7:09:51 AM GMT+01:00, Jakub Jelinek wrote: >Hi! > >As the following testcases show (the first one reported, last two >found by code inspection), we need to disallow side-effects >in simplifications that turn some unconditional expression into >conditional >one. From my little understanding of genmatch.c, it is able to >automatically disallow side effects if the same operand is used >multiple >times in the match pattern, maybe if it is used multiple times in the >replacement pattern, and if it is used in conditional contexts in the >match >pattern, could it be taught to handle this case too? If yes, perhaps >just the first hunk could be usable for 8/9 backports (+ the >testcases). It could possibly be done but then it's only three cases so far. OK. Richard. >Bootstrapped/regtested on x86_64-linux and i686-linux. > >2020-02-15 Jakub Jelinek > > PR tree-optimization/93744 > * match.pd (((m1 >/=/<= m2) * d -> (m1 >/=/<= m2) ? d : 0, > A - ((A - B) & -(C cmp D)) -> (C cmp D) ? B : A, > A + ((B - A) & -(C cmp D)) -> (C cmp D) ? B : A): For GENERIC, make > sure @2 in the first and @1 in the other patterns has no side-effects. > > * gcc.c-torture/execute/pr93744-1.c: New test. > * gcc.c-torture/execute/pr93744-2.c: New test. > * gcc.c-torture/execute/pr93744-3.c: New test. > >--- gcc/match.pd.jj2020-02-05 11:12:33.679383217 +0100 >+++ gcc/match.pd 2020-02-14 22:49:22.858771394 +0100 >@@ -1472,7 +1472,8 @@ (define_operator_list COND_TERNARY > (for cmp (gt lt ge le) > (simplify > (mult (convert (cmp @0 @1)) @2) >- (cond (cmp @0 @1) @2 { build_zero_cst (type); }))) >+ (if (GIMPLE || !TREE_SIDE_EFFECTS (@2)) >+ (cond (cmp @0 @1) @2 { build_zero_cst (type); } > > /* For integral types with undefined overflow and C != 0 fold >x * C EQ/NE y * C into x EQ/NE y. */ >@@ -2709,7 +2710,8 @@ (define_operator_list COND_TERNARY >&& TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE >&& INTEGRAL_TYPE_P (TREE_TYPE (@5)) >&& (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) >- || !TYPE_UNSIGNED (TREE_TYPE (@4 >+ || !TYPE_UNSIGNED (TREE_TYPE (@4))) >+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) >(cond (cmp @2 @3) @1 @0))) > (simplify > (plus:c @0 (bit_and:c (minus @1 @0) >@@ -2719,7 +2721,8 @@ (define_operator_list COND_TERNARY >&& TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE >&& INTEGRAL_TYPE_P (TREE_TYPE (@5)) >&& (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type) >- || !TYPE_UNSIGNED (TREE_TYPE (@4 >+ || !TYPE_UNSIGNED (TREE_TYPE (@4))) >+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1))) >(cond (cmp @2 @3) @1 @0 > > /* Simplifications of shift and rotates. */ >--- gcc/testsuite/gcc.c-torture/execute/pr93744-1.c.jj 2020-02-14 >22:50:58.993346192 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr93744-1.c2020-02-14 >22:49:57.934251395 +0100 >@@ -0,0 +1,14 @@ >+/* PR tree-optimization/93744 */ >+ >+typedef int I; >+ >+int >+main () >+{ >+ int a = 0; >+ I b = 0; >+ (a > 0) * (b |= 2); >+ if (b != 2) >+__builtin_abort (); >+ return 0; >+} >--- gcc/testsuite/gcc.c-torture/execute/pr93744-2.c.jj 2020-02-14 >22:51:01.100314955 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr93744-2.c2020-02-14 >22:50:18.299949478 +0100 >@@ -0,0 +1,21 @@ >+/* PR tree-optimization/93744 */ >+ >+int w; >+ >+int >+foo (int x, int y, int z) >+{ >+ int r = z - ((z - w++) & -(x < y)); >+ return r; >+} >+ >+int >+main () >+{ >+ w = 4; >+ if (foo (5, 7, 12) != 4 || w != 5) >+__builtin_abort (); >+ if (foo (7, 5, 12) != 12 || w != 6) >+__builtin_abort (); >+ return 0; >+} >--- gcc/testsuite/gcc.c-torture/execute/pr93744-3.c.jj 2020-02-14 >22:51:03.415280636 +0100 >+++ gcc/testsuite/gcc.c-torture/execute/pr93744-3.c2020-02-14 >22:50:25.820837971 +0100 >@@ -0,0 +1,21 @@ >+/* PR tree-optimization/93744 */ >+ >+int w; >+ >+int >+foo (int x, int y, int z) >+{ >+ int r = z + ((w++ - z) & -(x < y)); >+ return r; >+} >+ >+int >+main () >+{ >+ w = 4; >+ if (foo (5, 7, 12) != 4 || w != 5) >+__builtin_abort (); >+ if (foo (7, 5, 12) != 12 || w != 6) >+__builtin_abort (); >+ return 0; >+} > > Jakub
Re: [committed] c++: Fix constexpr if and braced functional cast.
On 2/13/20 12:42 AM, Jason Merrill wrote: While partially instantiating a generic lambda, we can encounter pack expansions or constexpr if where we can't actually do the substitution immediately, and instead remember a partial instantiation context in *_EXTRA_ARGS. This includes any local_specializations used in the pattern or condition. In this testcase our tree walk wasn't finding the use of i because we weren't walking into the type of a CONSTRUCTOR. Fixed by moving the code for doing that from find_parameter_packs_r into cp_walk_subtrees. Tested x86_64-pc-linux-gnu, applying to trunk. 2020-02-11 Jason Merrill PR c++/92583 PR c++/92654 * tree.c (cp_walk_subtrees): Walk CONSTRUCTOR types here. * pt.c (find_parameter_packs_r): Not here. Another place that is redundant with the code in cp_walk_subtrees: commit ce23347267daba090f619d61c6a7a749ea5dbeab Author: Jason Merrill Date: Fri Feb 14 11:37:26 2020 +0100 c++: Remove more dead code. gcc/cp/ChangeLog 2020-02-14 Jason Merrill PR c++/92583 * pt.c (any_template_parm_r): Remove CONSTRUCTOR handling. diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 6e7f4555da8..d19bde7bcbe 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -10448,11 +10448,6 @@ any_template_parm_r (tree t, void *data) WALK_SUBTREE (TREE_OPERAND (t, 1)); break; -case CONSTRUCTOR: - if (TREE_TYPE (t)) -WALK_SUBTREE (TREE_TYPE (t)); - break; - case PARM_DECL: /* A parameter or constraint variable may also depend on a template parameter without explicitly naming it. */
[committed] c++: Fix lambda in atomic constraint [PR92556]
find_template_parameters needs to find the mention of T in the lambda. Fixing that leaves this as a hard error, which may be surprising but is consistent with lambdas in other SFINAE contexts like template argument deduction. Tested x86_64-pc-linux-gnu, applying to trunk. gcc/cp/ChangeLog 2020-02-15 Jason Merrill PR c++/92556 * pt.c (any_template_parm_r): Look into lambda body. --- gcc/cp/pt.c | 9 + gcc/testsuite/g++.dg/cpp2a/concepts-lambda5.C | 10 ++ 2 files changed, 19 insertions(+) create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-lambda5.C diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index d19bde7bcbe..6c9abb8f3d3 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -10479,6 +10479,15 @@ any_template_parm_r (tree t, void *data) } break; +case LAMBDA_EXPR: + { + /* Look in the parms and body. */ + tree fn = lambda_function (t); + WALK_SUBTREE (TREE_TYPE (fn)); + WALK_SUBTREE (DECL_SAVED_TREE (fn)); + } + break; + default: break; } diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-lambda5.C b/gcc/testsuite/g++.dg/cpp2a/concepts-lambda5.C new file mode 100644 index 000..fe471899c14 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-lambda5.C @@ -0,0 +1,10 @@ +// PR c++/92556 +// { dg-do compile { target c++2a } } + +// Having this as a hard error is consistent with template argument deduction; +// it's an open core issue (jason 2020-02-14). +template concept has_value + = requires { []{T::value;}; }; // { dg-error "" } +template void f() { } +template void f() { } +void q() { f(); } base-commit: d71365427670a791c5b54bfec6e3d41210844a8a -- 2.18.1
[PATCH 02/10] i386: Use ix86_output_ssemov for XImode TYPE_SSEMOV
PR target/89229 * config/i386/i386.md (*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV. --- gcc/config/i386/i386.md | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f14683cd14f..b30e5a51edc 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1902,11 +1902,7 @@ (define_insn "*movxi_internal_avx512f" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - if (misaligned_operand (operands[0], XImode) - || misaligned_operand (operands[1], XImode)) - return "vmovdqu32\t{%1, %0|%0, %1}"; - else - return "vmovdqa32\t{%1, %0|%0, %1}"; + return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); -- 2.24.1
[PATCH 06/10] i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV
There is no need to set mode attribute to XImode since ix86_output_ssemov can properly encode xmm16-xmm31 registers with and without AVX512VL. gcc/ PR target/89229 * config/i386/i386.md (*movsi_internal): Call ix86_output_ssemov for TYPE_SSEMOV. Remove ext_sse_reg_operand and TARGET_AVX512VL check. gcc/testsuite/ PR target/89229 * gcc.target/i386/pr89229-4a.c: New test. * gcc.target/i386/pr89229-4b.c: Likewise. * gcc.target/i386/pr89229-4c.c: Likewise. --- gcc/config/i386/i386.md| 25 ++ gcc/testsuite/gcc.target/i386/pr89229-4a.c | 17 +++ gcc/testsuite/gcc.target/i386/pr89229-4b.c | 6 ++ gcc/testsuite/gcc.target/i386/pr89229-4c.c | 7 ++ 4 files changed, 32 insertions(+), 23 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4c.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 03d8078e957..05815c5cf3b 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2261,25 +2261,7 @@ (define_insn "*movsi_internal" gcc_unreachable (); case TYPE_SSEMOV: - switch (get_attr_mode (insn)) - { - case MODE_SI: - return "%vmovd\t{%1, %0|%0, %1}"; - case MODE_TI: - return "%vmovdqa\t{%1, %0|%0, %1}"; - case MODE_XI: - return "vmovdqa32\t{%g1, %g0|%g0, %g1}"; - - case MODE_V4SF: - return "%vmovaps\t{%1, %0|%0, %1}"; - - case MODE_SF: - gcc_assert (!TARGET_AVX); - return "movss\t{%1, %0|%0, %1}"; - - default: - gcc_unreachable (); - } + return ix86_output_ssemov (insn, operands); case TYPE_MMX: return "pxor\t%0, %0"; @@ -2345,10 +2327,7 @@ (define_insn "*movsi_internal" (cond [(eq_attr "alternative" "2,3") (const_string "DI") (eq_attr "alternative" "8,9") - (cond [(ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand")) - (const_string "XI") -(match_test "TARGET_AVX") + (cond [(match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4a.c b/gcc/testsuite/gcc.target/i386/pr89229-4a.c new file mode 100644 index 000..fd56f447016 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-4a.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +extern int i; + +int +foo1 (void) +{ + register int xmm16 __asm ("xmm16") = i; + asm volatile ("" : "+v" (xmm16)); + register int xmm17 __asm ("xmm17") = xmm16; + asm volatile ("" : "+v" (xmm17)); + return xmm17; +} + +/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */ +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4b.c b/gcc/testsuite/gcc.target/i386/pr89229-4b.c new file mode 100644 index 000..023e81253a0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-4b.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */ + +#include "pr89229-4a.c" + +/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4c.c b/gcc/testsuite/gcc.target/i386/pr89229-4c.c new file mode 100644 index 000..bb728082e96 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-4c.c @@ -0,0 +1,7 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */ + +#include "pr89229-4a.c" + +/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */ +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ -- 2.24.1
[PATCH 03/10] i386: Use ix86_output_ssemov for OImode TYPE_SSEMOV
There is no need to set mode attribute to XImode since ix86_output_ssemov can properly encode ymm16-ymm31 registers with and without AVX512VL. PR target/89229 * config/i386/i386.md (*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV. Remove ext_sse_reg_operand and TARGET_AVX512VL check. --- gcc/config/i386/i386.md | 26 ++ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b30e5a51edc..9e9b17d0913 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1925,21 +1925,7 @@ (define_insn "*movoi_internal_avx" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - if (misaligned_operand (operands[0], OImode) - || misaligned_operand (operands[1], OImode)) - { - if (get_attr_mode (insn) == MODE_XI) - return "vmovdqu32\t{%1, %0|%0, %1}"; - else - return "vmovdqu\t{%1, %0|%0, %1}"; - } - else - { - if (get_attr_mode (insn) == MODE_XI) - return "vmovdqa32\t{%1, %0|%0, %1}"; - else - return "vmovdqa\t{%1, %0|%0, %1}"; - } + return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); @@ -1948,15 +1934,7 @@ (define_insn "*movoi_internal_avx" [(set_attr "isa" "*,avx2,*,*") (set_attr "type" "sselog1,sselog1,ssemov,ssemov") (set_attr "prefix" "vex") - (set (attr "mode") - (cond [(ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand")) -(const_string "XI") - (and (eq_attr "alternative" "1") - (match_test "TARGET_AVX512VL")) -(const_string "XI") - ] - (const_string "OI")))]) + (set_attr "mode" "OI")]) (define_insn "*movti_internal" [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd") -- 2.24.1
[PATCH 00/10] i386: Properly encode xmm16-xmm31/ymm16-ymm31 for vector move
This patch set was originally submitted in Feb 2019: https://gcc.gnu.org/ml/gcc-patches/2019-02/msg01841.html I broke it into 10 smaller patches for easy review. On x86, when AVX and AVX512 are enabled, vector move instructions can be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512): 0: c5 f9 6f d1 vmovdqa %xmm1,%xmm2 4: 62 f1 fd 08 6f d1 vmovdqa64 %xmm1,%xmm2 We prefer VEX encoding over EVEX since VEX is shorter. Also AVX512F only supports 512-bit vector moves. AVX512F + AVX512VL supports 128-bit and 256-bit vector moves. Mode attributes on x86 vector move patterns indicate target preferences of vector move encoding. For vector register to vector register move, we can use 512-bit vector move instructions to move 128-bit/256-bit vector if AVX512VL isn't available. With AVX512F and AVX512VL, we should use VEX encoding for 128-bit/256-bit vector moves if upper 16 vector registers aren't used. This patch adds a function, ix86_output_ssemov, to generate vector moves: 1. If zmm registers are used, use EVEX encoding. 2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding will be generated. 3. If xmm16-xmm31/ymm16-ymm31 registers are used: a. With AVX512VL, AVX512VL vector moves will be generated. b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register move will be done with zmm register move. Tested on AVX2 and AVX512 with and without --with-arch=native. H.J. Lu (10): i386: Properly encode vector registers in vector move i386: Use ix86_output_ssemov for XImode TYPE_SSEMOV i386: Use ix86_output_ssemov for OImode TYPE_SSEMOV i386: Use ix86_output_ssemov for TImode TYPE_SSEMOV i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV i386: Use ix86_output_ssemov for SImode TYPE_SSEMOV i386: Use ix86_output_ssemov for TFmode TYPE_SSEMOV i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV gcc/config/i386/i386-protos.h | 2 + gcc/config/i386/i386.c| 274 ++ gcc/config/i386/i386.md | 212 +- gcc/config/i386/mmx.md| 29 +- gcc/config/i386/predicates.md | 5 - gcc/config/i386/sse.md| 98 +-- .../gcc.target/i386/avx512vl-vmovdqa64-1.c| 7 +- gcc/testsuite/gcc.target/i386/pr89229-2a.c| 15 + gcc/testsuite/gcc.target/i386/pr89229-2b.c| 13 + gcc/testsuite/gcc.target/i386/pr89229-2c.c| 6 + gcc/testsuite/gcc.target/i386/pr89229-3a.c| 17 ++ gcc/testsuite/gcc.target/i386/pr89229-3b.c| 6 + gcc/testsuite/gcc.target/i386/pr89229-3c.c| 7 + gcc/testsuite/gcc.target/i386/pr89229-4a.c| 17 ++ gcc/testsuite/gcc.target/i386/pr89229-4b.c| 6 + gcc/testsuite/gcc.target/i386/pr89229-4c.c| 7 + gcc/testsuite/gcc.target/i386/pr89229-5a.c| 16 + gcc/testsuite/gcc.target/i386/pr89229-5b.c| 12 + gcc/testsuite/gcc.target/i386/pr89229-5c.c| 6 + gcc/testsuite/gcc.target/i386/pr89229-6a.c| 16 + gcc/testsuite/gcc.target/i386/pr89229-6b.c| 7 + gcc/testsuite/gcc.target/i386/pr89229-6c.c| 6 + gcc/testsuite/gcc.target/i386/pr89229-7a.c| 16 + gcc/testsuite/gcc.target/i386/pr89229-7b.c| 6 + gcc/testsuite/gcc.target/i386/pr89229-7c.c| 6 + gcc/testsuite/gcc.target/i386/pr89346.c | 15 + 26 files changed, 497 insertions(+), 330 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-4c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7c.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89346.c -- 2.24.1
[PATCH 04/10] i386: Use ix86_output_ssemov for TImode TYPE_SSEMOV
There is no need to set mode attribute to XImode since ix86_output_ssemov can properly encode xmm16-xmm31 registers with and without AVX512VL. gcc/ PR target/89229 * config/i386/i386.md (*movti_internal): Call ix86_output_ssemov for TYPE_SSEMOV. Remove ext_sse_reg_operand and TARGET_AVX512VL check. gcc/testsuite/ PR target/89229 * gcc.target/i386/pr89229-2a.c: New test. * gcc.target/i386/pr89229-2b.c: Likewise. * gcc.target/i386/pr89229-2c.c: Likewise. --- gcc/config/i386/i386.md| 28 +- gcc/testsuite/gcc.target/i386/pr89229-2a.c | 15 gcc/testsuite/gcc.target/i386/pr89229-2b.c | 13 ++ gcc/testsuite/gcc.target/i386/pr89229-2c.c | 6 + 4 files changed, 35 insertions(+), 27 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-2c.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9e9b17d0913..5607d1ecddc 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1955,27 +1955,7 @@ (define_insn "*movti_internal" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - /* TDmode values are passed as TImode on the stack. Moving them -to stack may result in unaligned memory access. */ - if (misaligned_operand (operands[0], TImode) - || misaligned_operand (operands[1], TImode)) - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovups\t{%1, %0|%0, %1}"; - else if (get_attr_mode (insn) == MODE_XI) - return "vmovdqu32\t{%1, %0|%0, %1}"; - else - return "%vmovdqu\t{%1, %0|%0, %1}"; - } - else - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovaps\t{%1, %0|%0, %1}"; - else if (get_attr_mode (insn) == MODE_XI) - return "vmovdqa32\t{%1, %0|%0, %1}"; - else - return "%vmovdqa\t{%1, %0|%0, %1}"; - } + return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); @@ -2002,12 +1982,6 @@ (define_insn "*movti_internal" (set (attr "mode") (cond [(eq_attr "alternative" "0,1") (const_string "DI") - (ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand")) -(const_string "XI") - (and (eq_attr "alternative" "3") - (match_test "TARGET_AVX512VL")) -(const_string "XI") (match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2a.c b/gcc/testsuite/gcc.target/i386/pr89229-2a.c new file mode 100644 index 000..0cf78039481 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-2a.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +typedef __int128 __m128t __attribute__ ((__vector_size__ (16), +__may_alias__)); + +__m128t +foo1 (void) +{ + register __int128 xmm16 __asm ("xmm16") = (__int128) -1; + asm volatile ("" : "+v" (xmm16)); + return (__m128t) xmm16; +} + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2b.c b/gcc/testsuite/gcc.target/i386/pr89229-2b.c new file mode 100644 index 000..8d5d6c41d30 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-2b.c @@ -0,0 +1,13 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */ + +typedef __int128 __m128t __attribute__ ((__vector_size__ (16), +__may_alias__)); + +__m128t +foo1 (void) +{ + register __int128 xmm16 __asm ("xmm16") = (__int128) -1; /* { dg-error "register specified for 'xmm16'" } */ + asm volatile ("" : "+v" (xmm16)); + return (__m128t) xmm16; +} diff --git a/gcc/testsuite/gcc.target/i386/pr89229-2c.c b/gcc/testsuite/gcc.target/i386/pr89229-2c.c new file mode 100644 index 000..218da46dcd0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-2c.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */ + +#include "pr89229-2a.c" + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ -- 2.24.1
[PATCH 05/10] i386: Use ix86_output_ssemov for DImode TYPE_SSEMOV
There is no need to set mode attribute to XImode since ix86_output_ssemov can properly encode xmm16-xmm31 registers with and without AVX512VL. gcc/ PR target/89229 * config/i386/i386.md (*movdi_internal): Call ix86_output_ssemov for TYPE_SSEMOV. Remove ext_sse_reg_operand and TARGET_AVX512VL check. gcc/testsuite/ PR target/89229 * gcc.target/i386/pr89229-3a.c: New test. * gcc.target/i386/pr89229-3b.c: Likewise. * gcc.target/i386/pr89229-3c.c: Likewise. --- gcc/config/i386/i386.md| 31 ++ gcc/testsuite/gcc.target/i386/pr89229-3a.c | 17 gcc/testsuite/gcc.target/i386/pr89229-3b.c | 6 + gcc/testsuite/gcc.target/i386/pr89229-3c.c | 7 + 4 files changed, 32 insertions(+), 29 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-3c.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 5607d1ecddc..03d8078e957 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -2054,31 +2054,7 @@ (define_insn "*movdi_internal" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - switch (get_attr_mode (insn)) - { - case MODE_DI: - /* Handle broken assemblers that require movd instead of movq. */ - if (!HAVE_AS_IX86_INTERUNIT_MOVQ - && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))) - return "%vmovd\t{%1, %0|%0, %1}"; - return "%vmovq\t{%1, %0|%0, %1}"; - - case MODE_TI: - /* Handle AVX512 registers set. */ - if (EXT_REX_SSE_REG_P (operands[0]) - || EXT_REX_SSE_REG_P (operands[1])) - return "vmovdqa64\t{%1, %0|%0, %1}"; - return "%vmovdqa\t{%1, %0|%0, %1}"; - - case MODE_V2SF: - gcc_assert (!TARGET_AVX); - return "movlps\t{%1, %0|%0, %1}"; - case MODE_V4SF: - return "%vmovaps\t{%1, %0|%0, %1}"; - - default: - gcc_unreachable (); - } + return ix86_output_ssemov (insn, operands); case TYPE_SSECVT: if (SSE_REG_P (operands[0])) @@ -2164,10 +2140,7 @@ (define_insn "*movdi_internal" (cond [(eq_attr "alternative" "2") (const_string "SI") (eq_attr "alternative" "12,13") - (cond [(ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand")) - (const_string "TI") -(match_test "TARGET_AVX") + (cond [(match_test "TARGET_AVX") (const_string "TI") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3a.c b/gcc/testsuite/gcc.target/i386/pr89229-3a.c new file mode 100644 index 000..cb9b071e873 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-3a.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */ + +extern long long i; + +long long +foo1 (void) +{ + register long long xmm16 __asm ("xmm16") = i; + asm volatile ("" : "+v" (xmm16)); + register long long xmm17 __asm ("xmm17") = xmm16; + asm volatile ("" : "+v" (xmm17)); + return xmm17; +} + +/* { dg-final { scan-assembler-times "vmovdqa64\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */ +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3b.c b/gcc/testsuite/gcc.target/i386/pr89229-3b.c new file mode 100644 index 000..9265fc0354b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-3b.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */ + +#include "pr89229-3a.c" + +/* { dg-final { scan-assembler-times "vmovdqa32\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-3c.c b/gcc/testsuite/gcc.target/i386/pr89229-3c.c new file mode 100644 index 000..be0ca78a37e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-3c.c @@ -0,0 +1,7 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */ + +#include "pr89229-3a.c" + +/* { dg-final { scan-assembler-times "vmovdqa64\[^\n\r]*xmm1\[67]\[^\n\r]*xmm1\[67]" 1 } } */ +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ -- 2.24.1
[PATCH 01/10] i386: Properly encode vector registers in vector move
On x86, when AVX and AVX512 are enabled, vector move instructions can be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512): 0: c5 f9 6f d1 vmovdqa %xmm1,%xmm2 4: 62 f1 fd 08 6f d1 vmovdqa64 %xmm1,%xmm2 We prefer VEX encoding over EVEX since VEX is shorter. Also AVX512F only supports 512-bit vector moves. AVX512F + AVX512VL supports 128-bit and 256-bit vector moves. Mode attributes on x86 vector move patterns indicate target preferences of vector move encoding. For vector register to vector register move, we can use 512-bit vector move instructions to move 128-bit/256-bit vector if AVX512VL isn't available. With AVX512F and AVX512VL, we should use VEX encoding for 128-bit/256-bit vector moves if upper 16 vector registers aren't used. This patch adds a function, ix86_output_ssemov, to generate vector moves: 1. If zmm registers are used, use EVEX encoding. 2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding will be generated. 3. If xmm16-xmm31/ymm16-ymm31 registers are used: a. With AVX512VL, AVX512VL vector moves will be generated. b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register move will be done with zmm register move. Tested on AVX2 and AVX512 with and without --with-arch=native. gcc/ PR target/89229 PR target/89346 * config/i386/i386-protos.h (ix86_output_ssemov): New prototype. * config/i386/i386.c (ix86_get_ssemov): New function. (ix86_output_ssemov): Likewise. * config/i386/sse.md (VMOVE:mov_internal): Call ix86_output_ssemov for TYPE_SSEMOV. Remove TARGET_AVX512VL check. gcc/testsuite/ PR target/89229 PR target/89346 * gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated. * gcc.target/i386/pr89229-2a.c: New test. --- gcc/config/i386/i386-protos.h | 2 + gcc/config/i386/i386.c| 274 ++ gcc/config/i386/sse.md| 98 +-- .../gcc.target/i386/avx512vl-vmovdqa64-1.c| 7 +- gcc/testsuite/gcc.target/i386/pr89346.c | 15 + 5 files changed, 296 insertions(+), 100 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89346.c diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 266381ca5a6..39fcaa0ad5f 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -38,6 +38,8 @@ extern void ix86_expand_split_stack_prologue (void); extern void ix86_output_addr_vec_elt (FILE *, int); extern void ix86_output_addr_diff_elt (FILE *, int, int); +extern const char *ix86_output_ssemov (rtx_insn *, rtx *); + extern enum calling_abi ix86_cfun_abi (void); extern enum calling_abi ix86_function_type_abi (const_tree); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index dac7a3fc5fd..26f8c9494b9 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4915,6 +4915,280 @@ ix86_pre_reload_split (void) && !(cfun->curr_properties & PROP_rtl_split_insns)); } +/* Return the opcode of the TYPE_SSEMOV instruction. To move from + or to xmm16-xmm31/ymm16-ymm31 registers, we either require + TARGET_AVX512VL or it is a register to register move which can + be done with zmm register move. */ + +static const char * +ix86_get_ssemov (rtx *operands, unsigned size, +enum attr_mode insn_mode, machine_mode mode) +{ + char buf[128]; + bool misaligned_p = (misaligned_operand (operands[0], mode) + || misaligned_operand (operands[1], mode)); + bool evex_reg_p = (EXT_REX_SSE_REG_P (operands[0]) +|| EXT_REX_SSE_REG_P (operands[1])); + machine_mode scalar_mode; + + const char *opcode = NULL; + enum +{ + opcode_int, + opcode_float, + opcode_double +} type = opcode_int; + + switch (insn_mode) +{ +case MODE_V16SF: +case MODE_V8SF: +case MODE_V4SF: + scalar_mode = E_SFmode; + break; +case MODE_V8DF: +case MODE_V4DF: +case MODE_V2DF: + scalar_mode = E_DFmode; + break; +case MODE_XI: +case MODE_OI: +case MODE_TI: + scalar_mode = GET_MODE_INNER (mode); + break; +default: + gcc_unreachable (); +} + + if (SCALAR_FLOAT_MODE_P (scalar_mode)) +{ + switch (scalar_mode) + { + case E_SFmode: + if (size == 64 || !evex_reg_p || TARGET_AVX512VL) + opcode = misaligned_p ? "%vmovups" : "%vmovaps"; + else + type = opcode_float; + break; + case E_DFmode: + if (size == 64 || !evex_reg_p || TARGET_AVX512VL) + opcode = misaligned_p ? "%vmovupd" : "%vmovapd"; + else + type = opcode_double; + break; + case E_TFmode: + if (size == 64) + opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64"; + else if (evex_reg_p) + { + if (TARGET_AVX512VL) +
[PATCH 08/10] i386: Use ix86_output_ssemov for DFmode TYPE_SSEMOV
There is no need to set mode attribute to XImode nor V8DFmode since ix86_output_ssemov can properly encode xmm16-xmm31 registers with and without AVX512VL. gcc/ PR target/89229 * config/i386/i386.md (*movdf_internal): Call ix86_output_ssemov for TYPE_SSEMOV. Remove TARGET_AVX512F, TARGET_PREFER_AVX256, TARGET_AVX512VL and ext_sse_reg_operand check. gcc/testsuite/ PR target/89229 * gcc.target/i386/pr89229-6a.c: New test. * gcc.target/i386/pr89229-6b.c: Likewise. * gcc.target/i386/pr89229-6c.c: Likewise. --- gcc/config/i386/i386.md| 44 ++ gcc/testsuite/gcc.target/i386/pr89229-6a.c | 16 gcc/testsuite/gcc.target/i386/pr89229-6b.c | 7 gcc/testsuite/gcc.target/i386/pr89229-6c.c | 6 +++ 4 files changed, 32 insertions(+), 41 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-6c.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index fdf0e5a8802..01892992adb 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -3307,37 +3307,7 @@ (define_insn "*movdf_internal" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - switch (get_attr_mode (insn)) - { - case MODE_DF: - if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) - return "vmovsd\t{%d1, %0|%0, %d1}"; - return "%vmovsd\t{%1, %0|%0, %1}"; - - case MODE_V4SF: - return "%vmovaps\t{%1, %0|%0, %1}"; - case MODE_V8DF: - return "vmovapd\t{%g1, %g0|%g0, %g1}"; - case MODE_V2DF: - return "%vmovapd\t{%1, %0|%0, %1}"; - - case MODE_V2SF: - gcc_assert (!TARGET_AVX); - return "movlps\t{%1, %0|%0, %1}"; - case MODE_V1DF: - gcc_assert (!TARGET_AVX); - return "movlpd\t{%1, %0|%0, %1}"; - - case MODE_DI: - /* Handle broken assemblers that require movd instead of movq. */ - if (!HAVE_AS_IX86_INTERUNIT_MOVQ - && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))) - return "%vmovd\t{%1, %0|%0, %1}"; - return "%vmovq\t{%1, %0|%0, %1}"; - - default: - gcc_unreachable (); - } + return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); @@ -3391,10 +3361,7 @@ (define_insn "*movdf_internal" /* xorps is one byte shorter for non-AVX targets. */ (eq_attr "alternative" "12,16") -(cond [(and (match_test "TARGET_AVX512F") -(not (match_test "TARGET_PREFER_AVX256"))) - (const_string "XI") - (match_test "TARGET_AVX") +(cond [(match_test "TARGET_AVX") (const_string "V2DF") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) @@ -3410,12 +3377,7 @@ (define_insn "*movdf_internal" /* movaps is one byte shorter for non-AVX targets. */ (eq_attr "alternative" "13,17") -(cond [(and (ior (not (match_test "TARGET_PREFER_AVX256")) - (not (match_test "TARGET_AVX512VL"))) -(ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand"))) - (const_string "V8DF") - (match_test "TARGET_AVX") +(cond [(match_test "TARGET_AVX") (const_string "DF") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) diff --git a/gcc/testsuite/gcc.target/i386/pr89229-6a.c b/gcc/testsuite/gcc.target/i386/pr89229-6a.c new file mode 100644 index 000..5bc10d25619 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-6a.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +extern double d; + +void +foo1 (double x) +{ + register double xmm16 __asm ("xmm16") = x; + asm volatile ("" : "+v" (xmm16)); + register double xmm17 __asm ("xmm17") = xmm16; + asm volatile ("" : "+v" (xmm17)); + d = xmm17; +} + +/* { dg-final { scan-assembler-not "vmovapd" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-6b.c b/gcc/testsuite/gcc.target/i386/pr89229-6b.c new file mode 100644 index 000..b248a3726f4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-6b.c @@ -0,0 +1,7 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */ + +#include "pr89229-6a.c" + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ +/* { dg-fina
[PATCH 09/10] i386: Use ix86_output_ssemov for SFmode TYPE_SSEMOV
There is no need to set mode attribute to V16SFmode since ix86_output_ssemov can properly encode xmm16-xmm31 registers with and without AVX512VL. gcc/ PR target/89229 * config/i386/i386.md (*movdf_internal): Call ix86_output_ssemov for TYPE_SSEMOV. Remove TARGET_PREFER_AVX256, TARGET_AVX512VL and ext_sse_reg_operand check. gcc/testsuite/ PR target/89229 * gcc.target/i386/pr89229-7a.c: New test. * gcc.target/i386/pr89229-7b.c: Likewise. * gcc.target/i386/pr89229-7c.c: Likewise. --- gcc/config/i386/i386.md| 26 ++ gcc/testsuite/gcc.target/i386/pr89229-7a.c | 16 + gcc/testsuite/gcc.target/i386/pr89229-7b.c | 6 + gcc/testsuite/gcc.target/i386/pr89229-7c.c | 6 + 4 files changed, 30 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-7c.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 01892992adb..2dcf2d598c3 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -3469,24 +3469,7 @@ (define_insn "*movsf_internal" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - switch (get_attr_mode (insn)) - { - case MODE_SF: - if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1])) - return "vmovss\t{%d1, %0|%0, %d1}"; - return "%vmovss\t{%1, %0|%0, %1}"; - - case MODE_V16SF: - return "vmovaps\t{%g1, %g0|%g0, %g1}"; - case MODE_V4SF: - return "%vmovaps\t{%1, %0|%0, %1}"; - - case MODE_SI: - return "%vmovd\t{%1, %0|%0, %1}"; - - default: - gcc_unreachable (); - } + return ix86_output_ssemov (insn, operands); case TYPE_MMXMOV: switch (get_attr_mode (insn)) @@ -3558,12 +3541,7 @@ (define_insn "*movsf_internal" better to maintain the whole registers in single format to avoid problems on using packed logical operations. */ (eq_attr "alternative" "6") -(cond [(and (ior (not (match_test "TARGET_PREFER_AVX256")) - (not (match_test "TARGET_AVX512VL"))) -(ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand"))) - (const_string "V16SF") - (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") +(cond [(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY") (match_test "TARGET_SSE_SPLIT_REGS")) (const_string "V4SF") ] diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7a.c b/gcc/testsuite/gcc.target/i386/pr89229-7a.c new file mode 100644 index 000..856115b2f5a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-7a.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +extern float d; + +void +foo1 (float x) +{ + register float xmm16 __asm ("xmm16") = x; + asm volatile ("" : "+v" (xmm16)); + register float xmm17 __asm ("xmm17") = xmm16; + asm volatile ("" : "+v" (xmm17)); + d = xmm17; +} + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7b.c b/gcc/testsuite/gcc.target/i386/pr89229-7b.c new file mode 100644 index 000..93d1e43770c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-7b.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */ + +#include "pr89229-7a.c" + +/* { dg-final { scan-assembler-times "vmovaps\[^\n\r]*zmm1\[67]\[^\n\r]*zmm1\[67]" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-7c.c b/gcc/testsuite/gcc.target/i386/pr89229-7c.c new file mode 100644 index 000..e37ff2bf5bd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-7c.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */ + +#include "pr89229-7a.c" + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ -- 2.24.1
[PATCH 10/10] i386: Use ix86_output_ssemov for MMX TYPE_SSEMOV
There is no need to set mode attribute to XImode since ix86_output_ssemov can properly encode xmm16-xmm31 registers with and without AVX512VL. Remove ext_sse_reg_operand since it is no longer needed. PR target/89229 * config/i386/mmx.md (MMXMODE:*mov_internal): Call ix86_output_ssemov for TYPE_SSEMOV. Remove ext_sse_reg_operand check. * config/i386/predicates.md (ext_sse_reg_operand): Removed. --- gcc/config/i386/mmx.md| 29 ++--- gcc/config/i386/predicates.md | 5 - 2 files changed, 2 insertions(+), 32 deletions(-) diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index f695831b5b9..7d9db5d352c 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -118,29 +118,7 @@ (define_insn "*mov_internal" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - switch (get_attr_mode (insn)) - { - case MODE_DI: - /* Handle broken assemblers that require movd instead of movq. */ - if (!HAVE_AS_IX86_INTERUNIT_MOVQ - && (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))) - return "%vmovd\t{%1, %0|%0, %1}"; - return "%vmovq\t{%1, %0|%0, %1}"; - case MODE_TI: - return "%vmovdqa\t{%1, %0|%0, %1}"; - case MODE_XI: - return "vmovdqa64\t{%g1, %g0|%g0, %g1}"; - - case MODE_V2SF: - if (TARGET_AVX && REG_P (operands[0])) - return "vmovlps\t{%1, %0, %0|%0, %0, %1}"; - return "%vmovlps\t{%1, %0|%0, %1}"; - case MODE_V4SF: - return "%vmovaps\t{%1, %0|%0, %1}"; - - default: - gcc_unreachable (); - } + return ix86_output_ssemov (insn, operands); default: gcc_unreachable (); @@ -189,10 +167,7 @@ (define_insn "*mov_internal" (cond [(eq_attr "alternative" "2") (const_string "SI") (eq_attr "alternative" "11,12") - (cond [(ior (match_operand 0 "ext_sse_reg_operand") - (match_operand 1 "ext_sse_reg_operand")) - (const_string "XI") -(match_test "mode == V2SFmode") + (cond [(match_test "mode == V2SFmode") (const_string "V4SF") (ior (not (match_test "TARGET_SSE2")) (match_test "optimize_function_for_size_p (cfun)")) diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 1119366d54e..71f4cb1193c 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -61,11 +61,6 @@ (define_predicate "sse_reg_operand" (and (match_code "reg") (match_test "SSE_REGNO_P (REGNO (op))"))) -;; True if the operand is an AVX-512 new register. -(define_predicate "ext_sse_reg_operand" - (and (match_code "reg") - (match_test "EXT_REX_SSE_REGNO_P (REGNO (op))"))) - ;; Return true if op is a QImode register. (define_predicate "any_QIreg_operand" (and (match_code "reg") -- 2.24.1
[PATCH 07/10] i386: Use ix86_output_ssemov for TFmode TYPE_SSEMOV
gcc/ PR target/89229 * config/i386/i386.md (*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV. gcc/testsuite/ PR target/89229 * gcc.target/i386/pr89229-5a.c: New test. * gcc.target/i386/pr89229-5b.c: Likewise. * gcc.target/i386/pr89229-5c.c: Likewise. --- gcc/config/i386/i386.md| 26 +- gcc/testsuite/gcc.target/i386/pr89229-5a.c | 16 + gcc/testsuite/gcc.target/i386/pr89229-5b.c | 12 ++ gcc/testsuite/gcc.target/i386/pr89229-5c.c | 6 + 4 files changed, 35 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5a.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5b.c create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-5c.c diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 05815c5cf3b..fdf0e5a8802 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -3154,31 +3154,7 @@ (define_insn "*movtf_internal" return standard_sse_constant_opcode (insn, operands); case TYPE_SSEMOV: - /* Handle misaligned load/store since we - don't have movmisaligntf pattern. */ - if (misaligned_operand (operands[0], TFmode) - || misaligned_operand (operands[1], TFmode)) - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovups\t{%1, %0|%0, %1}"; - else if (TARGET_AVX512VL - && (EXT_REX_SSE_REG_P (operands[0]) - || EXT_REX_SSE_REG_P (operands[1]))) - return "vmovdqu64\t{%1, %0|%0, %1}"; - else - return "%vmovdqu\t{%1, %0|%0, %1}"; - } - else - { - if (get_attr_mode (insn) == MODE_V4SF) - return "%vmovaps\t{%1, %0|%0, %1}"; - else if (TARGET_AVX512VL - && (EXT_REX_SSE_REG_P (operands[0]) - || EXT_REX_SSE_REG_P (operands[1]))) - return "vmovdqa64\t{%1, %0|%0, %1}"; - else - return "%vmovdqa\t{%1, %0|%0, %1}"; - } + return ix86_output_ssemov (insn, operands); case TYPE_MULTI: return "#"; diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5a.c b/gcc/testsuite/gcc.target/i386/pr89229-5a.c new file mode 100644 index 000..fcb85c366b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-5a.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512" } */ + +extern __float128 d; + +void +foo1 (__float128 x) +{ + register __float128 xmm16 __asm ("xmm16") = x; + asm volatile ("" : "+v" (xmm16)); + register __float128 xmm17 __asm ("xmm17") = xmm16; + asm volatile ("" : "+v" (xmm17)); + d = xmm17; +} + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5b.c b/gcc/testsuite/gcc.target/i386/pr89229-5b.c new file mode 100644 index 000..37eb83c783b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-5b.c @@ -0,0 +1,12 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */ + +extern __float128 d; + +void +foo1 (__float128 x) +{ + register __float128 xmm16 __asm ("xmm16") = x; /* { dg-error "register specified for 'xmm16'" } */ + asm volatile ("" : "+v" (xmm16)); + d = xmm16; +} diff --git a/gcc/testsuite/gcc.target/i386/pr89229-5c.c b/gcc/testsuite/gcc.target/i386/pr89229-5c.c new file mode 100644 index 000..529a520133c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr89229-5c.c @@ -0,0 +1,6 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */ + +#include "pr89229-5a.c" + +/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */ -- 2.24.1
Re: [PATCH 1/3] libstdc++: Fold some ranges algo subroutines into their only caller
On Sat, 15 Feb 2020, Jonathan Wakely wrote: > On 14/02/20 10:35 -0500, Patrick Palka wrote: > > These subroutines have only a single call site, so it might be best and > > simplest > > to eliminate them before we convert the algos into function objects. > > > > libstdc++-v3/ChangeLog: > > > > * include/bits/ranges_algo.h (ranges::__find_end): Fold into ... > > (ranges::find_end): ... here. > > (ranges::__lexicographical_compare): Fold into ... > > (ranges::lexicographical_compare): ... here. > > * include/bits/ranges_algobase.h (ranges::__equal): Fold into ... > > (ranges::equal): ... here. > > OK for master, but please note the two comments below. > > > > libstdc++-v3/include/bits/ranges_algo.h | 104 > > libstdc++-v3/include/bits/ranges_algobase.h | 33 +++ > > 2 files changed, 55 insertions(+), 82 deletions(-) > > > > diff --git a/libstdc++-v3/include/bits/ranges_algo.h > > b/libstdc++-v3/include/bits/ranges_algo.h > > index 84a02cabb80..6b6f4defdf5 100644 > > --- a/libstdc++-v3/include/bits/ranges_algo.h > > +++ b/libstdc++-v3/include/bits/ranges_algo.h > > @@ -513,40 +513,7 @@ namespace ranges > > std::move(__pred), std::move(__proj)); > > } > > > > - template _Sent1, > > - forward_iterator _Iter2, sentinel_for<_Iter2> _Sent2, > > - typename _Pred = ranges::equal_to, > > - typename _Proj1 = identity, typename _Proj2 = identity> > > -requires indirectly_comparable<_Iter1, _Iter2, _Pred, _Proj1, _Proj2> > > -constexpr subrange<_Iter1> > > -__find_end(_Iter1 __first1, _Sent1 __last1, > > - _Iter2 __first2, _Sent2 __last2, > > - _Pred __pred, _Proj1 __proj1, _Proj2 __proj2) > > -{ > > - auto __i = ranges::next(__first1, __last1); > > - if (__first2 == __last2) > > - return {__i, __i}; > > > > - auto __result_begin = __i; > > - auto __result_end = __i; > > - for (;;) > > - { > > - auto __new_range = ranges::search(__first1, __last1, > > - __first2, __last2, > > - __pred, __proj1, __proj2); > > - auto __new_result_begin = ranges::begin(__new_range); > > - auto __new_result_end = ranges::end(__new_range); > > - if (__new_result_begin == __last1) > > - return {__result_begin, __result_end}; > > - else > > - { > > - __result_begin = __new_result_begin; > > - __result_end = __new_result_end; > > - __first1 = __result_begin; > > - ++__first1; > > - } > > - } > > -} > > > > template _Sent1, > >forward_iterator _Iter2, sentinel_for<_Iter2> _Sent2, > > @@ -578,9 +545,31 @@ namespace ranges > > return {__result_first, __result_last}; > > } > > else > > - return ranges::__find_end(__first1, __last1, __first2, __last2, > > - std::move(__pred), > > - std::move(__proj1), std::move(__proj2)); > > + { > > + auto __i = ranges::next(__first1, __last1); > > + if (__first2 == __last2) > > + return {__i, __i}; > > + > > + auto __result_begin = __i; > > + auto __result_end = __i; > > + for (;;) > > + { > > + auto __new_range = ranges::search(__first1, __last1, > > + __first2, __last2, > > + __pred, __proj1, __proj2); > > + auto __new_result_begin = ranges::begin(__new_range); > > + auto __new_result_end = ranges::end(__new_range); > > + if (__new_result_begin == __last1) > > + return {__result_begin, __result_end}; > > + else > > + { > > + __result_begin = __new_result_begin; > > + __result_end = __new_result_end; > > + __first1 = __result_begin; > > + ++__first1; > > + } > > + } > > + } > > } > > > > template > @@ -2908,14 +2897,26 @@ namespace ranges > > > > template _Sent1, > >input_iterator _Iter2, sentinel_for<_Iter2> _Sent2, > > - typename _Proj1, typename _Proj2, > > + typename _Proj1 = identity, typename _Proj2 = identity, > >indirect_strict_weak_order, > > - projected<_Iter2, _Proj2>> _Comp> > > + projected<_Iter2, _Proj2>> > > +_Comp = ranges::less> > > constexpr bool > > -__lexicographical_compare(_Iter1 __first1, _Sent1 __last1, > > - _Iter2 __first2, _Sent2 __last2, > > - _Comp __comp, _Proj1 __proj1, _Proj2 __proj2) > > +lexicographical_compare(_Iter1 __first1, _Sent1 __last1, > > + _Iter2 __first2, _Sent2 __last2, > > + _Comp __comp = {}, > > + _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) > > { > > + if constexpr (__detail::__is_normal_i
[PATCH] libstdc++: Move code after an early exit constexpr if to under an else branch
This avoids instantiating dead code when the true branch of the constexpr if is taken. [ diffstat generated with -w to ignore noisy whitespace changes ] libstdc++-v3/ChangeLog: * include/bits/ranges_algo.h (__lexicographical_compare_fn::operator()): Move code after an early exit constexpr if to under an else branch. * include/bits/ranges_algobase.h (__equal_fn::operator()): Likewise. --- libstdc++-v3/include/bits/ranges_algo.h | 7 +-- libstdc++-v3/include/bits/ranges_algobase.h | 7 ++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h index 7f8f0fb964b..ff1b40f6ace 100644 --- a/libstdc++-v3/include/bits/ranges_algo.h +++ b/libstdc++-v3/include/bits/ranges_algo.h @@ -3318,7 +3318,8 @@ namespace ranges std::__niter_base(std::move(__last2)), std::move(__comp), std::move(__proj1), std::move(__proj2)); - + else + { constexpr bool __sized_iters = (sized_sentinel_for<_Sent1, _Iter1> && sized_sentinel_for<_Sent2, _Iter2>); @@ -3342,7 +3343,8 @@ namespace ranges { if (const auto __len = std::min(__d1, __d2)) { - const auto __c = std::__memcmp(__first1, __first2, __len); + const auto __c + = std::__memcmp(__first1, __first2, __len); if constexpr (is_same_v<_Comp, ranges::less>) { if (__c < 0) @@ -3378,6 +3380,7 @@ namespace ranges } return __first1 == __last1 && __first2 != __last2; } + } template -&& sized_sentinel_for<_Sent2, _Iter2>); - if constexpr (__sized_iters) + else if constexpr (sized_sentinel_for<_Sent1, _Iter1> + && sized_sentinel_for<_Sent2, _Iter2>) { auto __d1 = ranges::distance(__first1, __last1); auto __d2 = ranges::distance(__first2, __last2); -- 2.25.0.232.gd8437c57fa
libgo patch committed: On 32-bit systems, limit default GOMAXPROCS to 32
This libgo patch limits the default value of GOMAXPROCS to 32 on 32-bit systems. Otherwise we can easily run out of stack space for threads. The user can still override by setting GOMAXPROCS. Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu. Committed to mainline. Ian a339c239a7ed8af25eb612ea4ceb5d975528b951 diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 27f4ce342e5..9916b02c57f 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -c94637ad6fd38d4814fb02d094a1a73f19323d71 +3e46519cee5c916a9b39480fbac13f4ffc6a93b0 The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go index c0e85773098..e3f934ae7bd 100644 --- a/libgo/go/runtime/proc.go +++ b/libgo/go/runtime/proc.go @@ -563,6 +563,14 @@ func schedinit() { sched.lastpoll = uint64(nanotime()) procs := ncpu + + // In 32-bit mode, we can burn a lot of memory on thread stacks. + // Try to avoid this by limiting the number of threads we run + // by default. + if sys.PtrSize == 4 && procs > 32 { + procs = 32 + } + if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 { procs = n }
libgo patch committed: Update to 1.14rc1
I've committed a patch to update libgo to the 1.14rc1 release (this is a release candidate for the 1.14 Go release). Bootstrapped and ran Go testsuite on x86_64-pc-linux-gnu. Committed to mainline. Ian patch.txt.bz2 Description: application/bzip
Re: [RFC PATCH v0] PPC64: Implement POWER Architecure Vector Function ABI.
‐‐‐ Original Message ‐‐‐ On Friday, February 14, 2020 6:46 PM, Segher Boessenkool wrote: > On Fri, Feb 14, 2020 at 08:24:30PM +, GT wrote: > > > Function rs6000_simd_clone_adjust, even though it's body is empty, > > cannot simply be removed. I tried it. It resulted in ICE. In my > > view, leaving it empty is preferable to modifying other files > > unrelated to rs6000.c in order to avoid having a function whose > > body is empty. > > Please Cc: the rs6000 maintainers on rs6000 patches, you will get a > reply faster, and more reliably. > File MAINTAINERS has you, David Edelsohn and Aldy Hernandez listed as maintainers of various rs6000 aspects. Is that who you say I should "Cc:" or is there a separate mailing list for rs6000? > Please don't use binary attachments, it takes effort to reply to those. > I have not been able to configure protonmail for either git imap-send or send-email. Will try pasting the .patch inline as plain text and see if that works. Bert.
Re: [PATCH] libstdc++: Move code after an early exit constexpr if to under an else branch
On 15/02/20 11:28 -0500, Patrick Palka wrote: This avoids instantiating dead code when the true branch of the constexpr if is taken. [ diffstat generated with -w to ignore noisy whitespace changes ] libstdc++-v3/ChangeLog: * include/bits/ranges_algo.h (__lexicographical_compare_fn::operator()): Move code after an early exit constexpr if to under an else branch. * include/bits/ranges_algobase.h (__equal_fn::operator()): Likewise. OK for master, thanks!
[committed] c++: Add -std=c++20.
It's probably past time for this, but definitely now that we're done with the final committee meeting of C++20. This patch only adds the option and adjusts the testsuite to recognize it; more extensive changes can wait for the published standard. Tested x86_64-pc-linux-gnu, applying to trunk. gcc/ChangeLog 2020-02-15 Jason Merrill * doc/invoke.texi (C Dialect Options): Add -std=c++20. gcc/c-family/ChangeLog 2020-02-15 Jason Merrill * c.opt: Add -std=c++20. gcc/testsuite/ChangeLog 2020-02-15 Jason Merrill * lib/target-supports.exp (check_effective_target_c++2a_only): Also look for -std=*++20. (check_effective_target_concepts): Use check_effective_target_c++2a. --- gcc/doc/invoke.texi | 10 ++ gcc/c-family/c.opt| 6 +- gcc/testsuite/lib/target-supports.exp | 9 ++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 597151670be..3e47d06f0d5 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -2078,13 +2078,15 @@ The name @samp{c++1z} is deprecated. GNU dialect of @option{-std=c++17}. The name @samp{gnu++1z} is deprecated. -@item c++2a -The next revision of the ISO C++ standard, tentatively planned for +@item c++20 +@itemx c++2a +The next revision of the ISO C++ standard, planned for 2020. Support is highly experimental, and will almost certainly change in incompatible ways in future releases. -@item gnu++2a -GNU dialect of @option{-std=c++2a}. Support is highly experimental, +@item gnu++20 +@itemx gnu++2a +GNU dialect of @option{-std=c++20}. Support is highly experimental, and will almost certainly change in incompatible ways in future releases. @end table diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 814ed17f7c4..b7e4fe146b2 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -2071,7 +2071,11 @@ Conform to the ISO 2017 C++ standard. std=c++2a C++ ObjC++ -Conform to the ISO 2020(?) C++ draft standard (experimental and incomplete support). +Conform to the ISO 2020 C++ draft standard (experimental and incomplete support). + +std=c++20 +C++ ObjC++ Alias(std=c++2a) +Conform to the ISO 2020 C++ draft standard (experimental and incomplete support). std=c11 C ObjC diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index fb177c54aa8..ec462315860 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8806,7 +8806,7 @@ proc check_effective_target_c++2a_only { } { if ![check_effective_target_c++] { return 0 } -if [check-flags { { } { } { -std=c++2a -std=gnu++2a } }] { +if [check-flags { { } { } { -std=c++2a -std=gnu++2a -std=c++20 -std=gnu++20 } }] { return 1 } if { $cxx_default == "c++20" && [check-flags { { } { } { } { -std=* } }] } { @@ -8818,9 +8818,12 @@ proc check_effective_target_c++2a { } { return [check_effective_target_c++2a_only] } -# Check for C++ Concepts TS support, i.e. -fconcepts flag. +# Check for C++ Concepts support, i.e. -fconcepts flag. proc check_effective_target_concepts { } { -return [check-flags { "" { } { -fconcepts -std=*2a } }] +if [check_effective_target_c++2a] { + return 1 +} +return [check-flags { "" { } { -fconcepts } }] } # Return 1 if expensive testcases should be run. base-commit: 0b3c2eed35d608d6541ecf004a9576b4eae0b4ef -- 2.18.1
libbacktrace patch committed: Update test file
This libbacktrace patch updates the test file used for comparisons with zlib. The file that the test was previously using, from libgo, no longer exists. Use its replacement file instead. Bootstrapped and ran libbacktrace tests on x86_64-pc-linux-gnu. Committed to mainline. Ian 2020-02-15 Ian Lance Taylor * ztest.c (test_large): Update file to current libgo test file. diff --git a/libbacktrace/ztest.c b/libbacktrace/ztest.c index 40f9c389a2a..2663c90061a 100644 --- a/libbacktrace/ztest.c +++ b/libbacktrace/ztest.c @@ -315,8 +315,8 @@ test_large (struct backtrace_state *state) size_t ctimes[16]; size_t ztimes[16]; static const char * const names[] = { -"Mark.Twain-Tom.Sawyer.txt", -"../libgo/go/compress/testdata/Mark.Twain-Tom.Sawyer.txt" +"Isaac.Newton-Opticks.txt", +"../libgo/go/testdata/Isaac.Newton-Opticks.txt", }; orig_buf = NULL;