[committed] libstdc++: Resolve -Wsign-compare issue

2023-05-26 Thread Matthias Kretz via Gcc-patches
pushed to master, will backport later

tested on x86_64-pc-linux-gnu and powerpc64le-linux-gnu

--- 8< 


Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_ppc.h (_S_bit_shift_left):
Negative __y is UB, so prefer signed compare.
---
 libstdc++-v3/include/experimental/bits/simd_ppc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_ppc.h b/libstdc++-v3/include/experimental/bits/simd_ppc.h
index 2ea7234bd99..34daa530f02 100644
--- a/libstdc++-v3/include/experimental/bits/simd_ppc.h
+++ b/libstdc++-v3/include/experimental/bits/simd_ppc.h
@@ -64,7 +64,7 @@ _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y)
 	__x = _Base::_S_bit_shift_left(__x, __y);
 	if constexpr (sizeof(_Tp) < sizeof(int))
 	  {
-	if (__y >= sizeof(_Tp) * __CHAR_BIT__)
+	if (__y >= int(sizeof(_Tp) * __CHAR_BIT__))
 	  return {};
 	  }
 	return __x;


[PATCH] libstdc++: Fix test assumptions on long and long double

2023-05-26 Thread Matthias Kretz via Gcc-patches
OK for master and all backports (after 11.4 is done)

tested on powerpc64le-linux-gnu (with 64-bit long double)

--- 8< 

Expect that long might not fit into the long double mantissa bits.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* testsuite/experimental/simd/tests/operator_cvt.cc: Make long
double <-> (u)long conversion tests conditional on sizeof(long
double) and sizeof(long).
---
 .../experimental/simd/tests/operator_cvt.cc   | 30 +++
 1 file changed, 24 insertions(+), 6 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/operator_cvt.cc b/libstdc++-v3/testsuite/experimental/simd/tests/operator_cvt.cc
index c1acfdf10e0..9b6cb38ff82 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/operator_cvt.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/operator_cvt.cc
@@ -219,8 +219,6 @@ namespace simd_abi
 	binary_op_return_type();
 	binary_op_return_type();
 	binary_op_return_type();
-	binary_op_return_type();
-	binary_op_return_type();
 	binary_op_return_type();
 	binary_op_return_type();
 
@@ -230,8 +228,6 @@ namespace simd_abi
 	binary_op_return_type, ushort>();
 	binary_op_return_type, int>();
 	binary_op_return_type, uint>();
-	binary_op_return_type, long>();
-	binary_op_return_type, ulong>();
 	binary_op_return_type, float>();
 	binary_op_return_type, double>();
 	binary_op_return_type, vf64>();
@@ -244,8 +240,6 @@ namespace simd_abi
 	binary_op_return_type, ushort>();
 	binary_op_return_type, int>();
 	binary_op_return_type, uint>();
-	binary_op_return_type, long>();
-	binary_op_return_type, ulong>();
 	binary_op_return_type, float>();
 	binary_op_return_type, double>();
 
@@ -257,6 +251,24 @@ namespace simd_abi
 	VERIFY((is_substitution_failure, ullong>));
 	VERIFY((is_substitution_failure, llong>));
 	VERIFY((is_substitution_failure, ullong>));
+	if constexpr (sizeof(long) == sizeof(llong))
+	  {
+		VERIFY((is_substitution_failure));
+		VERIFY((is_substitution_failure));
+		VERIFY((is_substitution_failure, long>));
+		VERIFY((is_substitution_failure, ulong>));
+		VERIFY((is_substitution_failure, long>));
+		VERIFY((is_substitution_failure, ulong>));
+	  }
+	else
+	  {
+		binary_op_return_type();
+		binary_op_return_type();
+		binary_op_return_type, long>();
+		binary_op_return_type, ulong>();
+		binary_op_return_type, long>();
+		binary_op_return_type, ulong>();
+	  }
 	  }
 	else
 	  {
@@ -266,6 +278,12 @@ namespace simd_abi
 	binary_op_return_type, ullong>();
 	binary_op_return_type, llong>();
 	binary_op_return_type, ullong>();
+	binary_op_return_type();
+	binary_op_return_type();
+	binary_op_return_type, long>();
+	binary_op_return_type, ulong>();
+	binary_op_return_type, long>();
+	binary_op_return_type, ulong>();
 	  }
 
 	VERIFY((is_substitution_failure, vldouble>));


[PATCH] libstdc++: Simplify calculation of expected value in simd test

2023-05-26 Thread Matthias Kretz via Gcc-patches
OK for master and all backports (after 11.4 is done)?

tested on powerpc64le-linux-gnu

--- 8< 

This avoids a failure on PR109964.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* testsuite/experimental/simd/tests/integer_operators.cc:
Compute expected value differently to avoid getting turned into
a vector shift.
---
 .../experimental/simd/tests/integer_operators.cc | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/integer_operators.cc b/libstdc++-v3/testsuite/experimental/simd/tests/integer_operators.cc
index 7a2bc085e49..08ad49ca710 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/integer_operators.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/integer_operators.cc
@@ -179,11 +179,10 @@ for (int s = 1; s < nbits; ++s)
 	  for (int j = 0; j < 100; ++j)
 	{
 	  const V seq([&](auto i) -> T { return (j + i) % n_promo_bits; });
-	  COMPARE(V(1) >> seq, V([&](auto i) { return T(T(1) >> seq[i]); }))
-		<< "seq = " << seq;
-	  COMPARE(make_value_unknown(V(1)) >> make_value_unknown(seq),
-		V([&](auto i) { return T(T(1) >> seq[i]); }))
-		<< "seq = " << seq;
+	  const V expect([&](auto i) { return seq[i] == 0 ? T(1) : T(0); });
+	  COMPARE(V(1) >> seq, expect) << "\nseq = " << seq;
+	  COMPARE(make_value_unknown(V(1)) >> make_value_unknown(seq), expect)
+		<< "\nseq = " << seq;
 	}
 	  for_constexpr([](auto shift_ic) {
 	constexpr int shift = shift_ic;


[PATCH] libstdc++: Correct NTTP and simd_mask ctor call

2023-05-26 Thread Matthias Kretz via Gcc-patches
OK for master and all backports (after 11.4 is done)?

tested on powerpc64le-linux-gnu and x86_64-pc-linux-gnu

--- 8< 

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109822
* include/experimental/bits/simd.h (to_native): Use int NTTP
as specified in PTS2.
(to_compatible): Likewise. Add missing tag to call mask
generator ctor.
* testsuite/experimental/simd/pr109822_cast_functions.cc: New
test.
---
 libstdc++-v3/include/experimental/bits/simd.h |  7 ++-
 .../simd/pr109822_cast_functions.cc   | 63 +++
 2 files changed, 67 insertions(+), 3 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/experimental/simd/
pr109822_cast_functions.cc


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 26f08f83ab0..f94b8361ab0 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -3304,7 +3304,7 @@ to_native(const fixed_size_simd<_Tp, _Np>& __x)
 return {__mem, vector_aligned};
   }
 
-template 
+template 
   _GLIBCXX_SIMD_INTRINSIC
   enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
   to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
@@ -3315,7 +3315,7 @@ to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
   }
 
 // to_compatible {{{2
-template 
+template 
   _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
   to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
   {
@@ -3324,12 +3324,13 @@ to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
 return {__mem, vector_aligned};
   }
 
-template 
+template 
   _GLIBCXX_SIMD_INTRINSIC
   enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
   to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
   {
 return simd_mask<_Tp>(
+	 __private_init,
 	 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
   }
 
diff --git a/libstdc++-v3/testsuite/experimental/simd/pr109822_cast_functions.cc b/libstdc++-v3/testsuite/experimental/simd/pr109822_cast_functions.cc
new file mode 100644
index 000..3deafbf7a1f
--- /dev/null
+++ b/libstdc++-v3/testsuite/experimental/simd/pr109822_cast_functions.cc
@@ -0,0 +1,63 @@
+// { dg-options "-std=gnu++17" }
+// { dg-do compile { target c++17 } }
+
+#include 
+
+namespace stdx = std::experimental;
+
+template 
+  void
+  test01()
+  {
+using M = typename V::mask_type;
+[[maybe_unused]] auto x = to_fixed_size(V());
+[[maybe_unused]] auto k = to_fixed_size(M());
+if constexpr (stdx::simd::size() == V::size())
+  {
+	[[maybe_unused]] auto xx = to_compatible(x);
+	[[maybe_unused]] auto kk = to_compatible(k);
+	x = to_fixed_size(xx);
+	k = to_fixed_size(kk);
+  }
+if constexpr (stdx::native_simd::size() == V::size())
+  {
+	[[maybe_unused]] auto xx = to_native(x);
+	[[maybe_unused]] auto kk = to_native(k);
+	x = to_fixed_size(xx);
+	k = to_fixed_size(kk);
+  }
+  }
+
+template 
+  void
+  iterate_abis()
+  {
+test01>();
+test01>();
+test01>();
+test01>();
+test01 - 4>>();
+  }
+
+int
+main()
+{
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+  iterate_abis();
+}


[committed] libstdc++: Fix condition for supported SIMD types on ARMv8

2023-06-01 Thread Matthias Kretz via Gcc-patches
pushed to trunk, will backport

tested on arm-linux-gnueabihf

-- 8< --

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/110050
* include/experimental/bits/simd.h (__vectorized_sizeof): With
__have_neon_a32 only single-precision float works (in addition
to integers).
---
 libstdc++-v3/include/experimental/bits/simd.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──
diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index f94b8361ab0..834fe923065 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -2808,8 +2808,10 @@ __vectorized_sizeof()
 	  return 16;
 
 	// ARM:
-	if constexpr (__have_neon_a64
-		  || (__have_neon_a32 && !is_same_v<_Tp, double>) )
+	if constexpr (__have_neon_a64)
+	  return 16;
+	if constexpr (__have_neon_a32 and (not is_floating_point_v<_Tp>
+	 or is_same_v<_Tp, float>))
 	  return 16;
 	if constexpr (__have_neon
 		  && sizeof(_Tp) < 8


Re: [PATCH] doc: clarify semantics of vector bitwise shifts

2023-06-02 Thread Matthias Kretz via Gcc-patches
On Thursday, 1 June 2023 20:25:14 CEST Alexander Monakov wrote:
> On Wed, 31 May 2023, Richard Biener wrote:
> > So yes, we probably should clarify the semantics to match the
> > implementation (since we have two targets doing things differently
> > since forever we can only document it as UB) and also note the
> > difference from OpenCL (in case OpenCL is still relevant these
> > days we might want to offer a -fopencl-vectors to emit the required
> > AND).
> 
> It doesn't have to be UB, in principle we could say that shift amount
> is taken modulo some power of two depending on the target without UB.
> But since LLVM already treats that as UB, we might as well follow.

I prefer UB (as your patch states 👍). If a user requires the AND, let them 
state it explicitly. Don't let everybody pay in performance.

> I think for addition/multiplication of signed vectors everybody
> expects them to have wrapping semantics without UB on overflow though?

  simd x = ...;
  bool t = all_of(x < x + 1); // unconditionally true or not?

I'd expect t to be unconditionally true. Because simd simply is a data-
parallel version of int.

> Revised patch below.

This can be considered a breaking change. Does it need a mention in the 
release notes?

- Matthias


-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


Re: [PATCH] libstdc++: Correct NTTP and simd_mask ctor call

2023-06-02 Thread Matthias Kretz via Gcc-patches
Hello Alexandre,

On Friday, 2 June 2023 10:32:40 CEST Alexandre Oliva wrote:
> On May 26, 2023, Matthias Kretz via Libstdc++  wrote:
> > OK for master and all backports (after 11.4 is done)?
> > tested on powerpc64le-linux-gnu and x86_64-pc-linux-gnu
> > 
> > * testsuite/experimental/simd/pr109822_cast_functions.cc: New
> > test.
> 
> This testcase fails to compile on PowerPC targets without VSX: 64-bit
> integer and floating-point types cannot be vectorized.

Yes, and the simd implementation already encodes that both in 
__vectorized_sizeof() and __intrinsic_type.

> I wonder if the test is malformed (and should be amended to test for
> available simd types), or whether a patch like this would be desirable
> to make simd constructs more portable.  I'm not sure about the
> requirements.

The test is correct. The stdx::simd implementation has a latent bug (my 
dejagnu boards included only POWER7-POWER9; I'm at POWER5-POWER10 by now). The 
_S_store function is trying to work around bad code-gen but fails to notice 
that long long vectors can't be used.

I'm looking at that function again, also in light of recent improvements wrt. 
code-gen, and will remove that assumption, that long long is vectorizable.

__intrinsic_type_t should never be T, but always the type that can be 
passed to corresponding platform intrinsics. There are traits for the 
implementation to detect whether the intrinsics types are available.

- Matthias

> 
> 
> [libstdc++] [simd] [ppc] use nonvector intrinsic fallback types
> 
> From: Alexandre Oliva 
> 
> Compiling such tests as pr109822_cast_functions.cc on powerpc targets
> that don't support VSX fails because some intrinsic types that are
> expected to be vectorizable are not defined without VSX.
> 
> Introduce fallback non-vector types to enable the code to compile.
> 
> 
> for  libstdc++-v3/ChangeLog
> 
>   * include/experimental/bits/simd.h: Introduce fallback
>   non-vector intrinsic_type_impl specializations for PowerPC
>   without VSX.
> ---
>  libstdc++-v3/include/experimental/bits/simd.h |   12 
>  1 file changed, 12 insertions(+)
> 
> diff --git a/libstdc++-v3/include/experimental/bits/simd.h
> b/libstdc++-v3/include/experimental/bits/simd.h index
> 834fe923065bd..2691823e869e8 100644
> --- a/libstdc++-v3/include/experimental/bits/simd.h
> +++ b/libstdc++-v3/include/experimental/bits/simd.h
> @@ -2431,9 +2431,14 @@ template 
>  #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp)  
>\ template <>   
>   \ struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
> +#define _GLIBCXX_SIMD_PPC_INTRIN_NOVEC(_Tp)  
   \
> +  template <>  
>\ +struct __intrinsic_type_impl<_Tp> { using type = _Tp; }
>  _GLIBCXX_SIMD_PPC_INTRIN(float);
>  #ifdef __VSX__
>  _GLIBCXX_SIMD_PPC_INTRIN(double);
> +#else
> +_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(double);
>  #endif
>  _GLIBCXX_SIMD_PPC_INTRIN(signed char);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
> @@ -2444,12 +2449,19 @@ _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
>  #if defined __VSX__ || __SIZEOF_LONG__ == 4
>  _GLIBCXX_SIMD_PPC_INTRIN(signed long);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
> +#else
> +_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(signed long);
> +_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(unsigned long);
>  #endif
>  #ifdef __VSX__
>  _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
> +#else
> +_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(signed long long);
> +_GLIBCXX_SIMD_PPC_INTRIN_NOVEC(unsigned long long);
>  #endif
>  #undef _GLIBCXX_SIMD_PPC_INTRIN
> +#undef _GLIBCXX_SIMD_PPC_INTRIN_NOVEC
> 
>  template 
>struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp>
> && _Bytes <= 16>>


-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──





Re: [PATCH] doc: clarify semantics of vector bitwise shifts

2023-06-02 Thread Matthias Kretz via Gcc-patches
On Friday, 2 June 2023 09:49:26 CEST Alexander Monakov wrote:
> > simd x = ...;
> > bool t = all_of(x < x + 1); // unconditionally true or not?
> > 
> > I'd expect t to be unconditionally true. Because simd simply is a
> > data- parallel version of int.
> 
> Okay, I see opinions will vary here. I was thinking about our immintrin.h
> which is partially implemented in terms of generic vectors. Imagine we
> extend UBSan to trap on signed overflow for vector types. I expect that
> will blow up on existing code that uses Intel intrinsics.

_mm_add_epi32 is already implemented via __v4su addition (i.e. unsigned). So 
the intrinsic would continue to wrap on signed overflow.

> > > Revised patch below.
> > 
> > This can be considered a breaking change. Does it need a mention in the
> > release notes?
> 
> I'm not sure what you consider a breaking change here. Is that the implied
> threat to use undefinedness for range deduction and other optimizations?

Consider the stdx::simd implementation. It currently follows semantics of the 
builtin types. So simd can be shifted by 30 without UB. The 
implementation of the shift operator depends on the current behavior, even if 
it is target-dependent. For PPC the simd implementation adds extra code to 
avoid the "UB". With nailing down shifts > sizeof(T) as UB this extra code now 
needs to be added for all targets.

- Matthias

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


Re: [PATCH] doc: clarify semantics of vector bitwise shifts

2023-06-02 Thread Matthias Kretz via Gcc-patches
On Friday, 2 June 2023 11:24:23 CEST Alexander Monakov wrote:
> > > I'm not sure what you consider a breaking change here. Is that the
> > > implied
> > > threat to use undefinedness for range deduction and other optimizations?
> > 
> > Consider the stdx::simd implementation. It currently follows semantics of
> > the builtin types. So simd can be shifted by 30 without UB. The
> > implementation of the shift operator depends on the current behavior, even
> > if it is target-dependent. For PPC the simd implementation adds extra
> > code to avoid the "UB". With nailing down shifts > sizeof(T) as UB this
> > extra code now needs to be added for all targets.
> 
> What does stdx::simd do on LLVM, where that has always been UB even on x86?

At this point Clang/LLVM support is best effort. I did not know before that 
LLVM nailed this down as UB. Also my test suite didn't show any failures on 
shifts IIRC (but that doesn't say anything about UB, I know).

FWIW, I'm okay with saying nothing in the release notes. It might just be that 
some codes have become dependent on the existing (under-specified) behavior. 🤷

- Matthias
-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


Re: [PATCH] libstdc++: Correct NTTP and simd_mask ctor call

2023-06-02 Thread Matthias Kretz via Gcc-patches
On Friday, 2 June 2023 11:30:17 CEST Alexandre Oliva wrote:
> I also noticed the same test is failing on rtems6 (at least with gcc
> 11).  AFAICT the problem is that _GLIBCXX_SIMD_MATH_CALL* macros in
> simd_math.h expect the named functions to be in std::, but I get such
> errors as:
> 
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299:
> error: 'remainder' is not a member of 'std'
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299:
> note: suggested alternatives: [...]
> .../aarch64-rtems6/include/math.h:346: note:   'remainder'
> [...]
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299:
> note:   'std::experimental::parallelism_v2::remainder'
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1299:
> error: template argument 1 is invalid [...]
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1328:
> error: 'fmin' is not a member of 'std'; did you mean 'min'?
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1328:
> error: 'fmin' is not a member of 'std'; did you mean 'min'?
> .../aarch64-rtems6/include/c++/11.4.1/experimental/bits/simd_math.h:1328:
> error: template argument 1 is invalid
> 
> ISTM that rtems is missing some of the math.h functions expected by
> libstdc++, but also that even those that are present are not visible in
> namespace ::std::, where the macros reasonably expect to find them.  Is
> this known?  Should I file a PR about it?

I had/have no idea. Is rtems6 using the "freestanding" subset of C++? In which 
case simd shouldn't be there at all. Otherwise  should work, no?

- Matthias

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


[committed] libstdc++: Protect against macros

2023-06-06 Thread Matthias Kretz via Gcc-patches
pushed to trunk, will backport later

tested on x86_64-pc-linux-gnu

--- >8 

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h (__bit_cast): Use
__gnu__::__vector_size__ instead of gnu::vector_size.
---
 libstdc++-v3/include/experimental/bits/simd.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──
diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 834fe923065..90523ea57dc 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1743,18 +1743,18 @@ __bit_cast(const _From __x)
   return reinterpret_cast<_To>(__x);
 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
   {
-	using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
+	using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
 	return reinterpret_cast<_To>(_FV{__x});
   }
 else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
   {
-	using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
-	using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
+	using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
+	using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
 	return reinterpret_cast<_TV>(_FV{__x})[0];
   }
 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
   {
-	using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
+	using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
 	return reinterpret_cast<_TV>(__x)[0];
   }
 else


[PATCH] libstdc++: Replace use of incorrect non-temporal store

2023-06-06 Thread Matthias Kretz via Gcc-patches
OK for master and backports?

Tested on x86_64-pc-linux-gnu

- >8 -

The call to the base implementation sometimes didn't find a matching
signature because the _Abi parameter of _SimdImpl* was "wrong" after
conversion. It has to call into ::_SimdImpl instead of the
current ABI tag's _SimdImpl. This also reduces the number of possible
template instantiations.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/110054
* include/experimental/bits/simd_builtin.h (_S_masked_store):
Call into deduced ABI's SimdImpl after conversion.
* include/experimental/bits/simd_x86.h (_S_masked_store_nocvt):
Don't use _mm_maskmoveu_si128. Use the generic fall-back
implementation. Also fix masked stores without SSE2, which
were not doing anything before.
---
 .../include/experimental/bits/simd_builtin.h  |  6 +--
 .../include/experimental/bits/simd_x86.h  | 38 ++-
 2 files changed, 7 insertions(+), 37 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 8337fa2d9a6..64ef6efaf8c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1628,7 +1628,7 @@ _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem, _MaskMember<_Tp> _
 	if constexpr (_UW_size == _TV_size) // one convert+store
 	  {
 		const _UW __converted = __convert<_UW>(__v);
-		_SuperImpl::_S_masked_store_nocvt(
+		_UAbi::_SimdImpl::_S_masked_store_nocvt(
 		  __converted, __mem,
 		  _UAbi::_MaskImpl::template _S_convert<
 		__int_for_sizeof_t<_Up>>(__k));
@@ -1643,7 +1643,7 @@ _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem, _MaskMember<_Tp> _
 		const array<_UV, _NAllStores> __converted
 		  = __convert_all<_UV, _NAllStores>(__v);
 		__execute_n_times<_NFullStores>([&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
-		  _SuperImpl::_S_masked_store_nocvt(
+		  _UAbi::_SimdImpl::_S_masked_store_nocvt(
 		_UW(__converted[__i]), __mem + __i * _UW_size,
 		_UAbi::_MaskImpl::template _S_convert<
 		  __int_for_sizeof_t<_Up>>(
@@ -1651,7 +1651,7 @@ _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem, _MaskMember<_Tp> _
 		});
 		if constexpr (_NAllStores
 			  > _NFullStores) // one partial at the end
-		  _SuperImpl::_S_masked_store_nocvt(
+		  _UAbi::_SimdImpl::_S_masked_store_nocvt(
 		_UW(__converted[_NFullStores]),
 		__mem + _NFullStores * _UW_size,
 		_UAbi::_MaskImpl::template _S_convert<
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 77d2f84ab71..2e301e45677 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -1106,31 +1106,6 @@ _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem, _SimdWrapper= 4 or sizeof(_vi) <= 32
-  // with Skylake-AVX512, __have_avx512bw is true
-	  else if constexpr (__have_sse2)
-	{
-	  using _M   = __vector_type_t<_Tp, _Np>;
-	  using _MVT = _VectorTraits<_M>;
-	  _mm_maskmoveu_si128(__auto_bitcast(__extract<0, 4>(__v._M_data)),
-  __auto_bitcast(_MaskImpl::template _S_convert<_Tp, _Np>(__k._M_data)),
-  reinterpret_cast(__mem));
-	  _mm_maskmoveu_si128(__auto_bitcast(__extract<1, 4>(__v._M_data)),
-  __auto_bitcast(_MaskImpl::template _S_convert<_Tp, _Np>(
-__k._M_data >> 1 * _MVT::_S_full_size)),
-  reinterpret_cast(__mem) + 1 * 16);
-	  _mm_maskmoveu_si128(__auto_bitcast(__extract<2, 4>(__v._M_data)),
-  __auto_bitcast(_MaskImpl::template _S_convert<_Tp, _Np>(
-__k._M_data >> 2 * _MVT::_S_full_size)),
-  reinterpret_cast(__mem) + 2 * 16);
-	  if constexpr (_Np > 48 / sizeof(_Tp))
-		_mm_maskmoveu_si128(
-		  __auto_bitcast(__extract<3, 4>(__v._M_data)),
-		  __auto_bitcast(_MaskImpl::template _S_convert<_Tp, _Np>(
-		__k._M_data >> 3 * _MVT::_S_full_size)),
-		  reinterpret_cast(__mem) + 3 * 16);
-	}
-#endif
 	else
 	  __assert_unreachable<_Tp>();
 	  }
@@ -1233,8 +1208,8 @@ _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem,
 	else if constexpr (__have_avx && sizeof(_Tp) == 8)
 	  _mm_maskstore_pd(reinterpret_cast(__mem), __ki,
 			   __vector_bitcast(__vi));
-	else if constexpr (__have_sse2)
-	  _mm_maskmoveu_si128(__vi, __ki, reinterpret_cast(__mem));
+	else
+	  _Base::_S_masked_store_nocvt(__v, __mem, __k);
 	  }
 	else if constexpr (sizeof(__v) == 32)
 	  {
@@ -1259,13 +1234,8 @@ _S_masked_store_nocvt(_SimdWrapper<_Tp,

[PATCH] libstdc++: Avoid vector casts while still avoiding PR90424

2023-06-06 Thread Matthias Kretz via Gcc-patches
This is the first part of a fix for the failure of the new simd test on non-
VSX POWER targets. There are more casts to unavailable vectors of 64-bit 
element types to be rewritten.

OK for master and backports?

Tested on x86_64-pc-linux-gnu and powerpc64le-linux-gnu

-- >8 ---

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109822
* include/experimental/bits/simd_builtin.h (_S_store): Rewrite
to avoid casts to other vector types. Implement store as
succession of power-of-2 sized memcpy to avoid PR90424.
---
 .../include/experimental/bits/simd_builtin.h  | 40 +++
 1 file changed, 15 insertions(+), 25 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 64ef6efaf8c..6ccc2fcec9c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1295,6 +1295,18 @@ _S_load(const void* __p)
 
   // }}}
   // _S_store {{{
+  template 
+_GLIBCXX_SIMD_INTRINSIC static void
+_S_memcpy(char* __dst, const char* __src)
+{
+  if constexpr (_Bytes > 0)
+	{
+	  constexpr size_t _Ns = std::__bit_floor(_Bytes);
+	  __builtin_memcpy(__dst, __src, _Ns);
+	  _S_memcpy<_Bytes - _Ns>(__dst + _Ns, __src + _Ns);
+	}
+}
+
   template 
 _GLIBCXX_SIMD_INTRINSIC static void
 _S_store(_TV __x, void* __addr)
@@ -1302,33 +1314,11 @@ _S_store(_TV __x, void* __addr)
   constexpr size_t _Bytes = _ReqBytes == 0 ? sizeof(__x) : _ReqBytes;
   static_assert(sizeof(__x) >= _Bytes);
 
+#if !defined __clang__ && _GLIBCXX_SIMD_WORKAROUND_PR90424
   if constexpr (__is_vector_type_v<_TV>)
-	{
-	  using _Tp = typename _VectorTraits<_TV>::value_type;
-	  constexpr size_t _Np = _Bytes / sizeof(_Tp);
-	  static_assert(_Np * sizeof(_Tp) == _Bytes);
-
-#ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
-	  using _Up = conditional_t<
-	(is_integral_v<_Tp> || _Bytes < 4),
-	conditional_t<(sizeof(__x) > sizeof(long long)), long long, _Tp>,
-	float>;
-	  const auto __v = __vector_bitcast<_Up>(__x);
-#else // _GLIBCXX_SIMD_WORKAROUND_PR90424
-	  const __vector_type_t<_Tp, _Np> __v = __x;
-#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
-
-	  if constexpr ((_Bytes & (_Bytes - 1)) != 0)
-	{
-	  constexpr size_t _MoreBytes = std::__bit_ceil(_Bytes);
-	  alignas(decltype(__v)) char __tmp[_MoreBytes];
-	  __builtin_memcpy(__tmp, &__v, _MoreBytes);
-	  __builtin_memcpy(__addr, __tmp, _Bytes);
-	}
-	  else
-	__builtin_memcpy(__addr, &__v, _Bytes);
-	}
+	_S_memcpy<_Bytes>(reinterpret_cast(__addr), reinterpret_cast(&__x));
   else
+#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
 	__builtin_memcpy(__addr, &__x, _Bytes);
 }
 


[PATCH] libstdc++: Rewrite or avoid casts to 64-bit element types

2023-06-06 Thread Matthias Kretz via Gcc-patches
Last part to resolve test failure introduced with PR109822.

OK for master and backports (gcc-11 doesn't have __builtin_shufflevector, 
though)?

Tested on x86_64-pc-linux-gnu and powerpc64le-linux-gnu

- >8 -

Fix SFINAE on __is_intrinsic_type for 64-bit element types on non-VSX
POWER targets.

Replace __extract_part implementation (which was the only user of
__shift_elements_right) by relying on __builtin_shufflevector (not
available in GCC 11). This removes another cast to 64-bit element type,
which breaks on non-VSX POWER.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109822
* include/experimental/bits/simd.h: Include 
for min(initializer_list).
Define __intrinsic_type_impl for all vectorizable types, but
without type member, if the target doesn't support it.
* include/experimental/bits/simd_builtin.h (__idx_permute): New.
(__shift_elements_right): Removed.
(__extract_part): Rewrite using __idx_permute.
(_S_reduce): Avoid unconditional cast to 64-bit element type
when the element type is known to be smaller.
---
 libstdc++-v3/include/experimental/bits/simd.h |  34 ++-
 .../include/experimental/bits/simd_builtin.h  | 250 +-
 2 files changed, 96 insertions(+), 188 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 90523ea57dc..effbc60ae46 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -29,6 +29,7 @@
 
 #include "simd_detail.h"
 #include "numeric_traits.h"
+#include 
 #include 
 #include 
 #ifdef _GLIBCXX_DEBUG_UB
@@ -2431,25 +2432,38 @@ struct __intrinsic_type_impl
 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp)  \
   template <>  \
 struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
-_GLIBCXX_SIMD_PPC_INTRIN(float);
 #ifdef __VSX__
-_GLIBCXX_SIMD_PPC_INTRIN(double);
+#define _GLIBCXX_SIMD_PPC_INTRIN_VSX(_Tp) _GLIBCXX_SIMD_PPC_INTRIN(_Tp)
+#else
+#define _GLIBCXX_SIMD_PPC_INTRIN_VSX(_Tp)  \
+  template <>  \
+struct __intrinsic_type_impl<_Tp>  \
+{}
+#endif
+#if defined __VSX__ || __SIZEOF_LONG__ == 4
+#define _GLIBCXX_SIMD_PPC_INTRIN_VSX_4(_Tp) _GLIBCXX_SIMD_PPC_INTRIN(_Tp)
+#else
+#define _GLIBCXX_SIMD_PPC_INTRIN_VSX_4(_Tp)\
+  template <>  \
+struct __intrinsic_type_impl<_Tp>  \
+{}
 #endif
+
+_GLIBCXX_SIMD_PPC_INTRIN(float);
+_GLIBCXX_SIMD_PPC_INTRIN_VSX(double);
 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
-#if defined __VSX__ || __SIZEOF_LONG__ == 4
-_GLIBCXX_SIMD_PPC_INTRIN(signed long);
-_GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
-#endif
-#ifdef __VSX__
-_GLIBCXX_SIMD_PPC_INTRIN(signed long long);
-_GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
-#endif
+_GLIBCXX_SIMD_PPC_INTRIN_VSX_4(signed long);
+_GLIBCXX_SIMD_PPC_INTRIN_VSX_4(unsigned long);
+_GLIBCXX_SIMD_PPC_INTRIN_VSX(signed long long);
+_GLIBCXX_SIMD_PPC_INTRIN_VSX(unsigned long long);
 #undef _GLIBCXX_SIMD_PPC_INTRIN
+#undef _GLIBCXX_SIMD_PPC_INTRIN_VSX
+#undef _GLIBCXX_SIMD_PPC_INTRIN_VSX_4
 
 template 
   struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 6ccc2fcec9c..f131b4ebba8 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -48,6 +48,57 @@
 = __andnot(_S_signmask<_V>, _S_allbits<_V>);
 
 //}}}
+
+constexpr int __idx_permute_dontcare = -1;
+constexpr int __idx_permute_zero = -2;
+
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _NewN>
+  __idx_permute_impl(_SimdWrapper<_Tp, _Np> __x, _IdxPerm __perm,
+		 std::integer_sequence)
+  {
+constexpr int _InputWidth = _Np;
+constexpr int _FullWidth = __x._S_full_size;
+return __builtin_shufflevector(__x._M_data, __vector_

[PATCH 0/7] Work on PR108030 and several simd bugfixes and testsuite improvements

2023-02-15 Thread Matthias Kretz via Gcc-patches
As suggested in PR108030, I used __attribute__ syntax to annotate lambdas 
as always_inline. In few cases the lambda was meant to be a function 
boundary and the attribute was omitted.

PR108030 mentions a few more functions as problematic. But ideally these 
should not be inline in some fixed_size_simd cases. This needs further 
verification.

This fix is not simply an optimization. If the user hits this bug then 
using simd makes the code significantly slower than without using simd. 
That defeats the whole purpose of the type.

While doing verification I found a few more issues and implemented the use 
of PCH to speed up the test suite.

Matthias Kretz (7):
  libstdc++: Ensure __builtin_constant_p isn't lost on the way
  libstdc++: Annotate most lambdas with always_inline
  libstdc++: Document timeout and timeout-factor of simd tests
  libstdc++: Use a PCH to speed up check-simd
  libstdc++: printf format string fix in testsuite
  libstdc++: Fix incorrect __builtin_is_constant_evaluated calls
  libstdc++: Fix incorrect function call in -ffast-math optimization

 libstdc++-v3/include/experimental/bits/simd.h | 245 ++--
 .../include/experimental/bits/simd_builtin.h  | 351 ++
 .../experimental/bits/simd_converter.h|  22 +-
 .../include/experimental/bits/simd_detail.h   |   3 +
 .../experimental/bits/simd_fixed_size.h   | 265 ++---
 .../include/experimental/bits/simd_math.h |  56 +--
 .../include/experimental/bits/simd_neon.h |  14 +-
 .../include/experimental/bits/simd_x86.h  | 143 +++
 .../testsuite/experimental/simd/README.md |  10 +-
 .../experimental/simd/generate_makefile.sh|  24 +-
 .../testsuite/experimental/simd/tests/abs.cc  |   4 +-
 .../experimental/simd/tests/algorithms.cc |   3 +-
 .../simd/tests/bits/conversions.h |  25 +-
 .../experimental/simd/tests/bits/main.h   |  87 +
 .../experimental/simd/tests/bits/make_vec.h   |  10 +
 .../simd/tests/bits/mathreference.h   |   3 +
 .../simd/tests/bits/test_values.h |   6 +
 .../experimental/simd/tests/bits/verify.h |  66 +---
 .../experimental/simd/tests/broadcast.cc  |   3 +-
 .../experimental/simd/tests/casts.cc  |   4 +-
 .../experimental/simd/tests/fpclassify.cc |   4 +-
 .../experimental/simd/tests/frexp.cc  |   4 +-
 .../experimental/simd/tests/generator.cc  |   3 +-
 .../experimental/simd/tests/hypot3_fma.cc |   4 +-
 .../simd/tests/integer_operators.cc   |   5 +-
 .../simd/tests/ldexp_scalbn_scalbln_modf.cc   |   4 +-
 .../experimental/simd/tests/loadstore.cc  |   4 +-
 .../experimental/simd/tests/logarithm.cc  |   5 +-
 .../experimental/simd/tests/mask_broadcast.cc |   3 +-
 .../simd/tests/mask_conversions.cc|   2 +-
 .../simd/tests/mask_implicit_cvt.cc   |   3 +-
 .../experimental/simd/tests/mask_loadstore.cc |  29 +-
 .../simd/tests/mask_operator_cvt.cc   |   3 +-
 .../experimental/simd/tests/mask_operators.cc |   3 +-
 .../simd/tests/mask_reductions.cc |  30 +-
 .../experimental/simd/tests/math_1arg.cc  |   3 +-
 .../experimental/simd/tests/math_2arg.cc  |   4 +-
 .../experimental/simd/tests/operator_cvt.cc   |   3 +-
 .../experimental/simd/tests/operators.cc  |  14 +-
 .../experimental/simd/tests/reductions.cc |   4 +-
 .../experimental/simd/tests/remqo.cc  |   4 +-
 .../testsuite/experimental/simd/tests/simd.cc |   2 +-
 .../experimental/simd/tests/sincos.cc |   6 +-
 .../experimental/simd/tests/split_concat.cc   |   4 +-
 .../experimental/simd/tests/splits.cc |   2 +-
 .../experimental/simd/tests/trigonometric.cc  |   4 +-
 .../simd/tests/trunc_ceil_floor.cc|   3 +-
 .../experimental/simd/tests/where.cc  |   4 +-
 48 files changed, 772 insertions(+), 735 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/experimental/simd/tests/bits/
main.h

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──






[PATCH 1/7] libstdc++: Ensure __builtin_constant_p isn't lost on the way

2023-02-15 Thread Matthias Kretz via Gcc-patches


The more expensive code path should only be taken if it can be optimized
away.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h
(_SimdWrapper::_M_is_constprop_none_of)
(_SimdWrapper::_M_is_constprop_all_of): Return false unless the
computed result still satisfies __builtin_constant_p.
---
 libstdc++-v3/include/experimental/bits/simd.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index e76f4781fa6..3de966bbf22 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -2673,7 +2673,8 @@ template 
 	  else
 	__execute_n_times<_Width>(
 	  [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
-	  return __r;
+	  if (__builtin_constant_p(__r))
+	return __r;
 	}
   return false;
 }
@@ -2693,7 +2694,8 @@ template 
 	  else
 	__execute_n_times<_Width>(
 	  [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
-	  return __r;
+	  if (__builtin_constant_p(__r))
+	return __r;
 	}
   return false;
 }


[PATCH 3/7] libstdc++: Document timeout and timeout-factor of simd tests

2023-02-15 Thread Matthias Kretz via Gcc-patches


Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* testsuite/experimental/simd/README.md: Document the timeout
and timeout-factor directives. Minor typo fixed.
---
 libstdc++-v3/testsuite/experimental/simd/README.md | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/testsuite/experimental/simd/README.md b/libstdc++-v3/testsuite/experimental/simd/README.md
index b82453df403..ef8b7c33de7 100644
--- a/libstdc++-v3/testsuite/experimental/simd/README.md
+++ b/libstdc++-v3/testsuite/experimental/simd/README.md
@@ -139,7 +139,13 @@ allowed_distance)` macros.
   test then shows as "XFAIL: ...". If the test passes, the test shows "XPASS: 
   ...".
 
-All patterns are matched via
+* `timeout: `
+  Set the timeout of this test to `` seconds.
+
+* `timeout-factor: `
+  Multiply the default timeout with ``.
+
+All patterns except `timeout` and `timeout-factor` are matched via
 ```sh
 case '' in
   )
@@ -147,7 +153,7 @@ case '' in
   ;;
 esac
 ```
-The `` is implicitly adds a `*` wildcard before and after the 
+The `` implicitly adds a `*` wildcard before and after the 
 pattern. Thus, the `CXXFLAGS` pattern matches a substring and all other 
 patterns require a full match.
 


[PATCH 7/7] libstdc++: Fix incorrect function call in -ffast-math optimization

2023-02-15 Thread Matthias Kretz via Gcc-patches


Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_math.h (__hypot): Bitcasting
between scalars requires the __bit_cast helper function instead
of simd_bit_cast.
---
 libstdc++-v3/include/experimental/bits/simd_math.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_math.h b/libstdc++-v3/include/experimental/bits/simd_math.h
index c20315e4e30..c91f05fceb3 100644
--- a/libstdc++-v3/include/experimental/bits/simd_math.h
+++ b/libstdc++-v3/include/experimental/bits/simd_math.h
@@ -1010,7 +1010,7 @@ template 
 	using _IV = rebind_simd_t<_Ip, _V>;
 	const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
 	const _V __scale
-	  = simd_bit_cast<_V>(2 * simd_bit_cast<_Ip>(_Tp(1)) - __as_int);
+	  = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
 #else
 	const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
 #endif
@@ -1181,7 +1181,7 @@ _GLIBCXX_SIMD_CVTING2(hypot)
 		using _IV = rebind_simd_t<_Ip, _V>;
 		const auto __as_int = simd_bit_cast<_IV>(__hi_exp);
 		const _V __scale
-		  = simd_bit_cast<_V>(2 * simd_bit_cast<_Ip>(_Tp(1)) - __as_int);
+		  = simd_bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
 #else
 		const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
 #endif


[PATCH 2/7] libstdc++: Annotate most lambdas with always_inline

2023-02-15 Thread Matthias Kretz via Gcc-patches


All of the annotated lambdas are simply a necessary means for
implementing these functions and should never result in an actual
function call. Many of these lambdas would go away if C++ had better
language support for packs.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/108030
* include/experimental/bits/simd_detail.h: Define
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA.
* include/experimental/bits/simd.h: Annotate lambdas with
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA.
* include/experimental/bits/simd_builtin.h: Ditto.
* include/experimental/bits/simd_converter.h: Ditto.
* include/experimental/bits/simd_fixed_size.h: Ditto.
* include/experimental/bits/simd_math.h: Ditto.
* include/experimental/bits/simd_neon.h: Ditto.
* include/experimental/bits/simd_x86.h: Ditto.
---
 libstdc++-v3/include/experimental/bits/simd.h | 239 ++--
 .../include/experimental/bits/simd_builtin.h  | 351 ++
 .../experimental/bits/simd_converter.h|  22 +-
 .../include/experimental/bits/simd_detail.h   |   3 +
 .../experimental/bits/simd_fixed_size.h   | 265 ++---
 .../include/experimental/bits/simd_math.h |  52 +--
 .../include/experimental/bits/simd_neon.h |  14 +-
 .../include/experimental/bits/simd_x86.h  | 122 +++---
 8 files changed, 575 insertions(+), 493 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 3de966bbf22..ffe72fa6ccf 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -609,28 +609,34 @@ template 
 	  operator&(_Ip __rhs) const
 	  {
 	return __generate_from_n_evaluations<_Np, _Ip>(
-	  [&](auto __i) { return __rhs._M_data[__i] & _M_data[__i]; });
+	  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+		return __rhs._M_data[__i] & _M_data[__i];
+	  });
 	  }
 
 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
 	  operator|(_Ip __rhs) const
 	  {
 	return __generate_from_n_evaluations<_Np, _Ip>(
-	  [&](auto __i) { return __rhs._M_data[__i] | _M_data[__i]; });
+	  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+		return __rhs._M_data[__i] | _M_data[__i];
+	  });
 	  }
 
 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
 	  operator^(_Ip __rhs) const
 	  {
 	return __generate_from_n_evaluations<_Np, _Ip>(
-	  [&](auto __i) { return __rhs._M_data[__i] ^ _M_data[__i]; });
+	  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+		return __rhs._M_data[__i] ^ _M_data[__i];
+	  });
 	  }
 
 	  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
 	  operator~() const
 	  {
 	return __generate_from_n_evaluations<_Np, _Ip>(
-	  [&](auto __i) { return ~_M_data[__i]; });
+	  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
 	  }
 	};
 	return _Ip{};
@@ -1391,7 +1397,7 @@ template 
 operator^=(const _BitMask& __b) & noexcept
 {
   __execute_n_times<_S_array_size>(
-	[&](auto __i) { _M_bits[__i] ^= __b._M_bits[__i]; });
+	[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
   return *this;
 }
 
@@ -1399,7 +1405,7 @@ template 
 operator|=(const _BitMask& __b) & noexcept
 {
   __execute_n_times<_S_array_size>(
-	[&](auto __i) { _M_bits[__i] |= __b._M_bits[__i]; });
+	[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
   return *this;
 }
 
@@ -1407,7 +1413,7 @@ template 
 operator&=(const _BitMask& __b) & noexcept
 {
   __execute_n_times<_S_array_size>(
-	[&](auto __i) { _M_bits[__i] &= __b._M_bits[__i]; });
+	[&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
   return *this;
 }
 
@@ -1797,8 +1803,9 @@ template 
   __vector_broadcast(_Tp __x)
   {
 return __call_with_n_evaluations<_Np>(
-  [](auto... __xx) { return __vector_type_t<_Tp, _Np>{__xx...}; },
-  [&__x](int) { return __x; });
+  [](auto... __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
+	return __vector_type_t<_Tp, _Np>{__xx...};
+  }, [&__x](int) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x; });
   }
 
 // }}}
@@ -2205,7 +2212,7 @@ template (
-	  __x, [](auto... __entries) {
+	  __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
 	return reinterpret_cast<_R>(_Up{__entries...});
 	  });
   }
@@ -2607,7 +2614,7 @@ template 
 
 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper(initializer_list<_Tp> __init)
   : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
-	[&](auto __i) { return __init.begin

[PATCH 5/7] libstdc++: printf format string fix in testsuite

2023-02-15 Thread Matthias Kretz via Gcc-patches


Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* testsuite/experimental/simd/tests/bits/verify.h
(verify::verify): Use %zx for size_t in format string.
---
 libstdc++-v3/testsuite/experimental/simd/tests/bits/verify.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/bits/verify.h b/libstdc++-v3/testsuite/experimental/simd/tests/bits/verify.h
index 2ab3ad3fa8c..01ad50bd01a 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/bits/verify.h
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/bits/verify.h
@@ -137,7 +137,7 @@ public:
 {
   if (m_failed)
 	[&] {
-	  __builtin_fprintf(stderr, "%s:%d: (%s):\nInstruction Pointer: %x\n"
+	  __builtin_fprintf(stderr, "%s:%d: (%s):\nInstruction Pointer: %zx\n"
 "Assertion '%s' failed.\n",
 			file, line, func, m_ip, cond);
 	  (print(extra_info, int()), ...);


[PATCH 4/7] libstdc++: Use a PCH to speed up check-simd

2023-02-15 Thread Matthias Kretz via Gcc-patches

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* testsuite/experimental/simd/generate_makefile.sh: Generate and
pre-compile pch.h, which includes all headers that do not depend
on command-line macros.
* testsuite/experimental/simd/tests/bits/conversions.h: Add
include guard.
(genHalfBits): Simplify.
* testsuite/experimental/simd/tests/bits/make_vec.h: Add include
guard.
(make_alternating_mask): Moved from mask_loadstore.
* testsuite/experimental/simd/tests/bits/mathreference.h: Add
include guard.
* testsuite/experimental/simd/tests/bits/test_values.h: Ditto.
* testsuite/experimental/simd/tests/mask_loadstore.cc
(make_mask, make_alternating_mask): Removed.
* testsuite/experimental/simd/tests/mask_reductions.cc: Ditto.
* testsuite/experimental/simd/tests/operators.cc (genHalfBits):
Removed.
* testsuite/experimental/simd/tests/abs.cc: Only include
bits/main.h.
* testsuite/experimental/simd/tests/algorithms.cc: Ditto.
* testsuite/experimental/simd/tests/broadcast.cc: Ditto.
* testsuite/experimental/simd/tests/casts.cc: Ditto.
* testsuite/experimental/simd/tests/fpclassify.cc: Ditto.
* testsuite/experimental/simd/tests/frexp.cc: Ditto.
* testsuite/experimental/simd/tests/generator.cc: Ditto.
* testsuite/experimental/simd/tests/hypot3_fma.cc: Ditto.
* testsuite/experimental/simd/tests/integer_operators.cc: Ditto.
* testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc:
Ditto.
* testsuite/experimental/simd/tests/loadstore.cc: Ditto.
* testsuite/experimental/simd/tests/logarithm.cc: Ditto.
* testsuite/experimental/simd/tests/mask_broadcast.cc: Ditto.
* testsuite/experimental/simd/tests/mask_implicit_cvt.cc: Ditto.
* testsuite/experimental/simd/tests/mask_operator_cvt.cc: Ditto.
* testsuite/experimental/simd/tests/mask_operators.cc: Ditto.
* testsuite/experimental/simd/tests/math_1arg.cc: Ditto.
* testsuite/experimental/simd/tests/math_2arg.cc: Ditto.
* testsuite/experimental/simd/tests/operator_cvt.cc: Ditto.
* testsuite/experimental/simd/tests/reductions.cc: Ditto.
* testsuite/experimental/simd/tests/remqo.cc: Ditto.
* testsuite/experimental/simd/tests/sincos.cc: Ditto.
* testsuite/experimental/simd/tests/split_concat.cc: Ditto.
* testsuite/experimental/simd/tests/trigonometric.cc: Ditto.
* testsuite/experimental/simd/tests/trunc_ceil_floor.cc: Ditto.
* testsuite/experimental/simd/tests/where.cc: Ditto.
---
 .../experimental/simd/generate_makefile.sh| 24 -
 .../testsuite/experimental/simd/tests/abs.cc  |  4 +-
 .../experimental/simd/tests/algorithms.cc |  3 +-
 .../simd/tests/bits/conversions.h | 25 ++
 .../experimental/simd/tests/bits/main.h   | 87 +++
 .../experimental/simd/tests/bits/make_vec.h   | 10 +++
 .../simd/tests/bits/mathreference.h   |  3 +
 .../simd/tests/bits/test_values.h |  6 ++
 .../experimental/simd/tests/bits/verify.h | 64 --
 .../experimental/simd/tests/broadcast.cc  |  3 +-
 .../experimental/simd/tests/casts.cc  |  4 +-
 .../experimental/simd/tests/fpclassify.cc |  4 +-
 .../experimental/simd/tests/frexp.cc  |  4 +-
 .../experimental/simd/tests/generator.cc  |  3 +-
 .../experimental/simd/tests/hypot3_fma.cc |  4 +-
 .../simd/tests/integer_operators.cc   |  5 +-
 .../simd/tests/ldexp_scalbn_scalbln_modf.cc   |  4 +-
 .../experimental/simd/tests/loadstore.cc  |  4 +-
 .../experimental/simd/tests/logarithm.cc  |  5 +-
 .../experimental/simd/tests/mask_broadcast.cc |  3 +-
 .../simd/tests/mask_conversions.cc|  2 +-
 .../simd/tests/mask_implicit_cvt.cc   |  3 +-
 .../experimental/simd/tests/mask_loadstore.cc | 29 +--
 .../simd/tests/mask_operator_cvt.cc   |  3 +-
 .../experimental/simd/tests/mask_operators.cc |  3 +-
 .../simd/tests/mask_reductions.cc | 30 +--
 .../experimental/simd/tests/math_1arg.cc  |  3 +-
 .../experimental/simd/tests/math_2arg.cc  |  4 +-
 .../experimental/simd/tests/operator_cvt.cc   |  3 +-
 .../experimental/simd/tests/operators.cc  | 14 +--
 .../experimental/simd/tests/reductions.cc |  4 +-
 .../experimental/simd/tests/remqo.cc  |  4 +-
 .../testsuite/experimental/simd/tests/simd.cc |  2 +-
 .../experimental/simd/tests/sincos.cc |  6 +-
 .../experimental/simd/tests/split_concat.cc   |  4 +-
 .../experimental/simd/tests/splits.cc |  2 +-
 .../experimental/simd/tests/trigonometric.cc  |  4 +-
 .../simd/tests/trunc_ceil_floor.cc|  3 +-
 .../experimental/simd/tests/where.cc  |  4 +-
 39 files changed, 170 insertions(+), 226 deletions(-)
 create mode 100644 libstdc++-

[PATCH 6/7] libstdc++: Fix incorrect __builtin_is_constant_evaluated calls

2023-02-15 Thread Matthias Kretz via Gcc-patches


Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h
(_SimdImplX86::_S_not_equal_to, _SimdImplX86::_S_less)
(_SimdImplX86::_S_less_equal): Do not call
__builtin_is_constant_evaluated in constexpr-if.
---
 .../include/experimental/bits/simd_x86.h  | 21 +++
 1 file changed, 12 insertions(+), 9 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 60e80d394ba..dcfdc2a9496 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -2344,15 +2344,16 @@ template 
 	else
 	  __assert_unreachable<_Tp>();
 	  }   // }}}
-	else if constexpr (!__builtin_is_constant_evaluated() // {{{
-			   && sizeof(__x) == 8)
+	else if (__builtin_is_constant_evaluated())
+	  return _Base::_S_not_equal_to(__x, __y);
+	else if constexpr (sizeof(__x) == 8)
 	  {
 	const auto __r128 = __vector_bitcast<_Tp, 16 / sizeof(_Tp)>(__x)
 != __vector_bitcast<_Tp, 16 / sizeof(_Tp)>(__y);
 	_MaskMember<_Tp> __r64;
 	__builtin_memcpy(&__r64._M_data, &__r128, sizeof(__r64));
 	return __r64;
-	  } // }}}
+	  }
 	else
 	  return _Base::_S_not_equal_to(__x, __y);
   }
@@ -2451,15 +2452,16 @@ template 
 	else
 	  __assert_unreachable<_Tp>();
 	  }   // }}}
-	else if constexpr (!__builtin_is_constant_evaluated() // {{{
-			   && sizeof(__x) == 8)
+	else if (__builtin_is_constant_evaluated())
+	  return _Base::_S_less(__x, __y);
+	else if constexpr (sizeof(__x) == 8)
 	  {
 	const auto __r128 = __vector_bitcast<_Tp, 16 / sizeof(_Tp)>(__x)
 < __vector_bitcast<_Tp, 16 / sizeof(_Tp)>(__y);
 	_MaskMember<_Tp> __r64;
 	__builtin_memcpy(&__r64._M_data, &__r128, sizeof(__r64));
 	return __r64;
-	  } // }}}
+	  }
 	else
 	  return _Base::_S_less(__x, __y);
   }
@@ -2558,15 +2560,16 @@ template 
 	else
 	  __assert_unreachable<_Tp>();
 	  }   // }}}
-	else if constexpr (!__builtin_is_constant_evaluated() // {{{
-			   && sizeof(__x) == 8)
+	else if (__builtin_is_constant_evaluated())
+	  return _Base::_S_less_equal(__x, __y);
+	else if constexpr (sizeof(__x) == 8)
 	  {
 	const auto __r128 = __vector_bitcast<_Tp, 16 / sizeof(_Tp)>(__x)
 <= __vector_bitcast<_Tp, 16 / sizeof(_Tp)>(__y);
 	_MaskMember<_Tp> __r64;
 	__builtin_memcpy(&__r64._M_data, &__r128, sizeof(__r64));
 	return __r64;
-	  } // }}}
+	  }
 	else
 	  return _Base::_S_less_equal(__x, __y);
   }


[committed] libstdc++: Fix uses of non-reserved names in simd header

2023-02-20 Thread Matthias Kretz via Gcc-patches
Tested x86_64-pc-linux. Pushed to trunk.

-- >8 --

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h (__extract_part, split):
Use reserved name for template parameter.
---
 libstdc++-v3/include/experimental/bits/simd.h | 22 +--
 1 file changed, 11 insertions(+), 11 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index ffe72fa6ccf..2f615d13b73 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -3783,7 +3783,7 @@ template 
   _SimdWrapper<_Tp, _Np / _Total * _Combine>
   __extract_part(const _SimdWrapper<_Tp, _Np> __x);
 
-template 
   _GLIBCXX_SIMD_INTRINSIC auto
   __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
@@ -3896,19 +3896,19 @@ template 
 
 // split(simd) {{{
 template  / _V::size()>
-  enable_if_t == Parts * _V::size()
-	  && is_simd_v<_V>, array<_V, Parts>>
+	  size_t _Parts = simd_size_v / _V::size()>
+  enable_if_t == _Parts * _V::size()
+		&& is_simd_v<_V>, array<_V, _Parts>>
   split(const simd& __x)
   {
 using _Tp = typename _V::value_type;
-if constexpr (Parts == 1)
+if constexpr (_Parts == 1)
   {
 	return {simd_cast<_V>(__x)};
   }
 else if (__x._M_is_constprop())
   {
-	return __generate_from_n_evaluations>(
+	return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
 		 [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
 		   return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
 			 { return __x[__i * _V::size() + __j]; });
@@ -3925,12 +3925,12 @@ template * const __element_ptr
 	= reinterpret_cast*>(&__data(__x));
-  return __generate_from_n_evaluations>(
+  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
 	   [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
 	   { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
 #else
   const auto& __xx = __data(__x);
-  return __generate_from_n_evaluations>(
+  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
 	   [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
 		 [[maybe_unused]] constexpr size_t __offset
 		   = decltype(__i)::value * _V::size();
@@ -3944,12 +3944,12 @@ template )
 {
   // normally memcpy should work here as well
-  return __generate_from_n_evaluations>(
+  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
 	   [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
 }
   else
 {
-  return __generate_from_n_evaluations>(
+  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
 	   [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
 		 if constexpr (__is_fixed_size_abi_v)
 		   return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
@@ -3957,7 +3957,7 @@ template (__data(__x)));
+			 __extract_part(__data(__x)));
 	   });
 }
   }


[PATCH 8/8] libstdc++: Test that integral simd reductions are precise

2023-02-23 Thread Matthias Kretz via Gcc-patches


Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* testsuite/experimental/simd/tests/reductions.cc: Introduce
max_distance as the type-dependent max error.
---
 libstdc++-v3/testsuite/experimental/simd/tests/reductions.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/reductions.cc b/libstdc++-v3/testsuite/experimental/simd/tests/reductions.cc
index 0c4c79feb20..fed164314d7 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/reductions.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/reductions.cc
@@ -112,6 +112,7 @@ template 
   T acc = x[0];
   for (size_t i = 1; i < V::size(); ++i)
 	acc += x[i];
-  ULP_COMPARE(reduce(x), acc, V::size() / 2).on_failure("x = ", x);
+  const T max_distance = std::is_integral_v ? 0 : V::size() / 2;
+  ULP_COMPARE(reduce(x), acc, max_distance).on_failure("x = ", x);
 });
   }


[PATCH 4/8] libstdc++: Add missing constexpr on simd shift implementation

2023-02-23 Thread Matthias Kretz via Gcc-patches


Resolves -Wtautological-compare warnings about `if
(__builtin_is_constant_evaluated())` in the implementations of these
functions.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h (_S_bit_shift_left)
(_S_bit_shift_right): Declare constexpr. The implementation was
already expecting constexpr evaluation.
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 897a67829d1..8872ca301b9 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -1526,7 +1526,7 @@ _S_modulus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
 // values.
   #ifndef _GLIBCXX_SIMD_NO_SHIFT_OPT
 template >
-  inline _GLIBCXX_CONST static typename _TVT::type
+  constexpr inline _GLIBCXX_CONST static typename _TVT::type
   _S_bit_shift_left(_Tp __xx, int __y)
   {
 	using _V = typename _TVT::type;
@@ -1631,7 +1631,7 @@ unsigned(
   }
 
 template >
-  inline _GLIBCXX_CONST static typename _TVT::type
+  constexpr inline _GLIBCXX_CONST static typename _TVT::type
   _S_bit_shift_left(_Tp __xx, typename _TVT::type __y)
   {
 	using _V = typename _TVT::type;
@@ -1800,7 +1800,7 @@ _mm512_cvtepi16_epi8(
 // _S_bit_shift_right {{{
 #ifndef _GLIBCXX_SIMD_NO_SHIFT_OPT
 template >
-  inline _GLIBCXX_CONST static typename _TVT::type
+  constexpr inline _GLIBCXX_CONST static typename _TVT::type
   _S_bit_shift_right(_Tp __xx, int __y)
   {
 	using _V = typename _TVT::type;
@@ -1850,7 +1850,7 @@ _S_bit_shift_right(_Tp __xx, int __y)
   }
 
 template >
-  inline _GLIBCXX_CONST static typename _TVT::type
+  constexpr inline _GLIBCXX_CONST static typename _TVT::type
   _S_bit_shift_right(_Tp __xx, typename _TVT::type __y)
   {
 	using _V = typename _TVT::type;


[PATCH 6/8] libstdc++: Fix formatting

2023-02-23 Thread Matthias Kretz via Gcc-patches


Whitespace changes only.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h: Line breaks and indenting
fixed to follow the libstdc++ standard.
* include/experimental/bits/simd_builtin.h: Likewise.
* include/experimental/bits/simd_fixed_size.h: Likewise.
* include/experimental/bits/simd_neon.h: Likewise.
* include/experimental/bits/simd_ppc.h: Likewise.
* include/experimental/bits/simd_scalar.h: Likewise.
* include/experimental/bits/simd_x86.h: Likewise.
---
 libstdc++-v3/include/experimental/bits/simd.h | 473 ++--
 .../include/experimental/bits/simd_builtin.h  | 692 +-
 .../experimental/bits/simd_fixed_size.h   | 228 +++---
 .../include/experimental/bits/simd_neon.h |  24 +-
 .../include/experimental/bits/simd_ppc.h  |   3 +-
 .../include/experimental/bits/simd_scalar.h   | 362 +
 .../include/experimental/bits/simd_x86.h  |  90 ++-
 7 files changed, 942 insertions(+), 930 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 7482d109291..fb661c9657f 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -180,10 +180,7 @@ struct vector_aligned_tag
   template 
 _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
 _S_apply(_Up* __ptr)
-{
-  return static_cast<_Up*>(
-	__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>));
-}
+{ return static_cast<_Up*>( __builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
 };
 
 template  struct overaligned_tag
@@ -288,13 +285,15 @@ namespace __detail
   // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
   // must be guessed.
   template 
-constexpr bool __handle_fpexcept_impl(int)
+constexpr bool
+__handle_fpexcept_impl(int)
 { return math_errhandling & MATH_ERREXCEPT; }
 #endif
 
   // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
   // ignored, otherwise implement correct exception behavior.
-  constexpr bool __handle_fpexcept_impl(float)
+  constexpr bool
+  __handle_fpexcept_impl(float)
   {
 #if defined __FAST_MATH__
 return false;
@@ -749,8 +748,7 @@ struct __make_dependent
 // __invoke_ub{{{
 template 
   [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
-  __invoke_ub([[maybe_unused]] const char* __msg,
-	  [[maybe_unused]] const _Args&... __args)
+  __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
   {
 #ifdef _GLIBCXX_DEBUG_UB
 __builtin_fprintf(stderr, __msg, __args...);
@@ -795,11 +793,14 @@ class _ExactBool
   const bool _M_data;
 
 public:
-  _GLIBCXX_SIMD_INTRINSIC constexpr _ExactBool(bool __b) : _M_data(__b) {}
+  _GLIBCXX_SIMD_INTRINSIC constexpr
+  _ExactBool(bool __b) : _M_data(__b) {}
 
   _ExactBool(int) = delete;
 
-  _GLIBCXX_SIMD_INTRINSIC constexpr operator bool() const { return _M_data; }
+  _GLIBCXX_SIMD_INTRINSIC constexpr
+  operator bool() const
+  { return _M_data; }
 };
 
 // }}}
@@ -1488,8 +1489,7 @@ struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
 
 // else, use GNU-style builtin vector types
 template 
-  struct __vector_type_n<_Tp, _Np,
-			 enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
+  struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
   {
 static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
 
@@ -1770,8 +1770,7 @@ __bit_cast(const _From __x)
 // }}}
 // __to_intrin {{{
 template ,
-	  typename _R
-	  = __intrinsic_type_t>
+	  typename _R = __intrinsic_type_t>
   _GLIBCXX_SIMD_INTRINSIC constexpr _R
   __to_intrin(_Tp __x)
   {
@@ -1792,9 +1791,7 @@ __to_intrin(_Tp __x)
 template 
   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
   __make_vector(const _Args&... __args)
-  {
-return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...};
-  }
+  { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
 
 // }}}
 // __vector_broadcast{{{
@@ -1813,10 +1810,7 @@ __vector_broadcast(_Tp __x)
   template 
   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
   __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
-  {
-return __vector_type_t<_Tp, _Np>{
-  static_cast<_Tp>(__gen(_SizeConstant<_I>()))...};
-  }
+  { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
 
 template , typename _Gp>
   _GLIBCXX_SIMD_INTRINSIC constexpr _V
@@ -2029,8 +2023,7 @@ __not(_Tp __

[PATCH 7/8] libstdc++: Fix -Wsign-compare issue

2023-02-23 Thread Matthias Kretz via Gcc-patches


Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_builtin.h (_S_set): Compare as
int. The actual range of these indexes is very small.
---
 libstdc++-v3/include/experimental/bits/simd_builtin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 0e75f941288..30bbfa7d478 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -2875,7 +2875,7 @@ _S_bit_xor(const _SimdWrapper<_Tp, _Np>& __x, const _SimdWrapper<_Tp, _Np>& __y)
 		__k = __generate_from_n_evaluations<_Np,
 		__vector_type_t<_Tp, _Np>>(
 		  [&](auto __j) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
-		if (__i == __j)
+		if (__i == static_cast(__j))
 		  return _Tp(-__x);
 		else
 		  return __k[+__j];


[PATCH 2/8] libstdc++: Fix simd build failure on clang

2023-02-23 Thread Matthias Kretz via Gcc-patches


Clang does not support __attribute__ on lambdas. Therefore, only set
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA if __clang__ is not defined.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/108030
* include/experimental/bits/simd_detail.h
(_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA): Define as empty for
__clang__.
---
 libstdc++-v3/include/experimental/bits/simd_detail.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h
index a0ad10efe0f..30cc1ef0eef 100644
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -254,15 +254,16 @@ namespace experimental
 
 #ifdef __clang__
 #define _GLIBCXX_SIMD_NORMAL_MATH
+#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
 #else
 #define _GLIBCXX_SIMD_NORMAL_MATH  \
   [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
+#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__))
 #endif
 #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
 #define _GLIBCXX_SIMD_INTRINSIC\
   [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
 #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
-#define _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA __attribute__((__always_inline__))
 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
 


[PATCH 5/8] libstdc++: Always-inline most of non-cmath fixed_size implementation

2023-02-23 Thread Matthias Kretz via Gcc-patches


For simd, the inlining behavior should be similar to builtin types. (No
operator on buitin types is ever translated into a function call.)
Therefore, always_inline is the right choice (i.e. inline on -O0 as
well).

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/108030
* include/experimental/bits/simd_fixed_size.h
(_SimdImplFixedSize::_S_broadcast): Replace inline with
_GLIBCXX_SIMD_INTRINSIC.
(_SimdImplFixedSize::_S_generate): Likewise.
(_SimdImplFixedSize::_S_load): Likewise.
(_SimdImplFixedSize::_S_masked_load): Likewise.
(_SimdImplFixedSize::_S_store): Likewise.
(_SimdImplFixedSize::_S_masked_store): Likewise.
(_SimdImplFixedSize::_S_min): Likewise.
(_SimdImplFixedSize::_S_max): Likewise.
(_SimdImplFixedSize::_S_complement): Likewise.
(_SimdImplFixedSize::_S_unary_minus): Likewise.
(_SimdImplFixedSize::_S_plus): Likewise.
(_SimdImplFixedSize::_S_minus): Likewise.
(_SimdImplFixedSize::_S_multiplies): Likewise.
(_SimdImplFixedSize::_S_divides): Likewise.
(_SimdImplFixedSize::_S_modulus): Likewise.
(_SimdImplFixedSize::_S_bit_and): Likewise.
(_SimdImplFixedSize::_S_bit_or): Likewise.
(_SimdImplFixedSize::_S_bit_xor): Likewise.
(_SimdImplFixedSize::_S_bit_shift_left): Likewise.
(_SimdImplFixedSize::_S_bit_shift_right): Likewise.
(_SimdImplFixedSize::_S_remquo): Add inline keyword (to be
explicit about not always-inline, yet).
(_SimdImplFixedSize::_S_isinf): Likewise.
(_SimdImplFixedSize::_S_isfinite): Likewise.
(_SimdImplFixedSize::_S_isnan): Likewise.
(_SimdImplFixedSize::_S_isnormal): Likewise.
(_SimdImplFixedSize::_S_signbit): Likewise.
---
 .../experimental/bits/simd_fixed_size.h   | 60 +--
 1 file changed, 30 insertions(+), 30 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
index 3ac6eaa3f6b..88a9b27e359 100644
--- a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
+++ b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
@@ -1284,7 +1284,8 @@ struct _SimdImplFixedSize
 
 // broadcast {{{2
 template 
-  static constexpr inline _SimdMember<_Tp> _S_broadcast(_Tp __x) noexcept
+  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
+  _S_broadcast(_Tp __x) noexcept
   {
 	return _SimdMember<_Tp>::_S_generate(
 		 [&](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
@@ -1294,8 +1295,8 @@ struct _SimdImplFixedSize
 
 // _S_generator {{{2
 template 
-  static constexpr inline _SimdMember<_Tp> _S_generator(_Fp&& __gen,
-			_TypeTag<_Tp>)
+  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
+  _S_generator(_Fp&& __gen, _TypeTag<_Tp>)
   {
 	return _SimdMember<_Tp>::_S_generate(
 		 [&__gen](auto __meta) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
@@ -1310,8 +1311,8 @@ struct _SimdImplFixedSize
 
 // _S_load {{{2
 template 
-  static inline _SimdMember<_Tp> _S_load(const _Up* __mem,
-	 _TypeTag<_Tp>) noexcept
+  _GLIBCXX_SIMD_INTRINSIC static _SimdMember<_Tp>
+  _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept
   {
 	return _SimdMember<_Tp>::_S_generate(
 		 [&](auto __meta) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
@@ -1321,7 +1322,7 @@ struct _SimdImplFixedSize
 
 // _S_masked_load {{{2
 template 
-  static inline _SimdTuple<_Tp, _As...>
+  _GLIBCXX_SIMD_INTRINSIC static _SimdTuple<_Tp, _As...>
   _S_masked_load(const _SimdTuple<_Tp, _As...>& __old,
 		 const _MaskMember __bits, const _Up* __mem) noexcept
   {
@@ -1344,8 +1345,8 @@ _S_masked_load(const _SimdTuple<_Tp, _As...>& __old,
 
 // _S_store {{{2
 template 
-  static inline void _S_store(const _SimdMember<_Tp>& __v, _Up* __mem,
-  _TypeTag<_Tp>) noexcept
+  _GLIBCXX_SIMD_INTRINSIC static void
+  _S_store(const _SimdMember<_Tp>& __v, _Up* __mem, _TypeTag<_Tp>) noexcept
   {
 	__for_each(__v, [&](auto __meta, auto __native) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
 	  __meta._S_store(__native, &__mem[__meta._S_offset], _TypeTag<_Tp>());
@@ -1354,9 +1355,9 @@ _S_masked_load(const _SimdTuple<_Tp, _As...>& __old,
 
 // _S_masked_store {{{2
 template 
-  static inline void _S_masked_store(const _SimdTuple<_Tp, _As...>& __v,
-	 _Up* __mem,
-	 const _MaskMember __bits) noexcept
+  _GLIBCXX_SIMD_INTRINSIC static void
+  _S_masked_store(const _SimdTuple<_Tp, _As...>& __v, _Up* __mem

[PATCH 3/8] libstdc++: More efficient masked inc-/decrement implementation

2023-02-23 Thread Matthias Kretz via Gcc-patches


Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/108856
* include/experimental/bits/simd_builtin.h
(_SimdImplBuiltin::_S_masked_unary): More efficient
implementation of masked inc-/decrement for integers and floats
without AVX2.
* include/experimental/bits/simd_x86.h
(_SimdImplX86::_S_masked_unary): New. Use AVX512 masked subtract
builtins for masked inc-/decrement.
---
 .../include/experimental/bits/simd_builtin.h  | 27 +++-
 .../include/experimental/bits/simd_x86.h  | 68 +++
 2 files changed, 93 insertions(+), 2 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 792439a81bf..4a4de4534f3 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -2546,8 +2546,31 @@ _S_masked_unary(const _SimdWrapper<_K, _Np> __k,
 	_Op __op;
 	if (__k._M_is_constprop_all_of())
 	  return __data(__op(__vv));
-	else
-	  return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv)));
+	else if constexpr (is_same_v<_Op, __increment>)
+	  {
+	static_assert(not std::is_same_v<_K, bool>);
+	if constexpr (is_integral_v<_Tp>)
+	  // Take a shortcut knowing that __k is an integer vector with values -1 or 0.
+	  return __v._M_data - __vector_bitcast<_Tp>(__k._M_data);
+	else if constexpr (not __have_avx2)
+	  return __v._M_data
+		   + __vector_bitcast<_Tp>(__k._M_data & __builtin_bit_cast(
+			   _K, _Tp(1)));
+	// starting with AVX2 it is more efficient to blend after add
+	  }
+	else if constexpr (is_same_v<_Op, __decrement>)
+	  {
+	static_assert(not std::is_same_v<_K, bool>);
+	if constexpr (is_integral_v<_Tp>)
+	  // Take a shortcut knowing that __k is an integer vector with values -1 or 0.
+	  return __v._M_data + __vector_bitcast<_Tp>(__k._M_data);
+	else if constexpr (not __have_avx2)
+	  return __v._M_data
+		   - __vector_bitcast<_Tp>(__k._M_data & __builtin_bit_cast(
+			   _K, _Tp(1)));
+	// starting with AVX2 it is more efficient to blend after sub
+	  }
+	return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv)));
   }
 
 //}}}2
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index dcfdc2a9496..897a67829d1 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -3462,6 +3462,74 @@ _S_islessgreater(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
   }
 
 //}}} }}}
+template  class _Op, typename _Tp, typename _K,
+	  size_t _Np>
+  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
+  _S_masked_unary(const _SimdWrapper<_K, _Np> __k,
+		  const _SimdWrapper<_Tp, _Np> __v)
+  {
+	if (__k._M_is_constprop_none_of())
+	  return __v;
+	else if (__k._M_is_constprop_all_of())
+	  {
+	auto __vv = _Base::_M_make_simd(__v);
+	_Op __op;
+	return __data(__op(__vv));
+	  }
+	else if constexpr (__is_bitmask_v
+			 && (is_same_v<_Op, __increment>
+   || is_same_v<_Op, __decrement>))
+	  {
+	// optimize masked unary increment and decrement as masked sub +/-1
+	constexpr int __pm_one
+	  = is_same_v<_Op, __increment> ? -1 : 1;
+	if constexpr (is_integral_v<_Tp>)
+	  {
+		constexpr bool __lp64 = sizeof(long) == sizeof(long long);
+		using _Ip = std::make_signed_t<_Tp>;
+		using _Up = std::conditional_t<
+			  std::is_same_v<_Ip, long>,
+			  std::conditional_t<__lp64, long long, int>,
+			  std::conditional_t<
+std::is_same_v<_Ip, signed char>, char, _Ip>>;
+		const auto __value = __vector_bitcast<_Up>(__v._M_data);
+#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)\
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   \
+return __vector_bitcast<_Tp>(__builtin_ia32_##_Instr##_mask(__value,   \
+	 __vector_broadcast<_Np>(_Up(__pm_one)), __value, __k._M_data))
+		_GLIBCXX_SIMD_MASK_SUB(1, 64, psubb512);
+		_GLIBCXX_SIMD_MASK_SUB(1, 32, psubb256);
+		_GLIBCXX_SIMD_MASK_SUB(1, 16, psubb128);
+		_GLIBCXX_SIMD_MASK_SUB(2, 64, psubw512);
+		_GLIBCXX_SIMD_MASK_SUB(2, 32, psubw256);
+		_GLIBCXX_SIMD_MASK_SUB(2, 16, psubw128);
+		_GLIBCXX_SIMD_MASK_SUB(4, 64, psubd512);
+		_GLIBCXX_SIMD_MASK_SUB(4, 32, psubd256);
+		_GLIBCXX_SIMD_MASK_SUB(4, 16, psubd128);
+		_GLIBCXX_SIMD_MASK_SUB(8, 64, psubq512);
+		_GLIBCXX_SIMD_MASK_SUB(8, 32, psubq256);
+		_GLIBCXX_SIMD_MASK_SUB(8, 16, psubq128);
+#undef _G

[PATCH 1/8] libstdc++: Simplify three helper functions into one

2023-02-23 Thread Matthias Kretz via Gcc-patches


Broadcast is a very common function. This should reduce compile-time
effort.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/108030
* include/experimental/bits/simd.h (__vector_broadcast):
Implement via __vector_broadcast_impl instead of
__call_with_n_evaluations + 2 lambdas.
(__vector_broadcast_impl): New.
---
 libstdc++-v3/include/experimental/bits/simd.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 2f615d13b73..7482d109291 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1798,15 +1798,15 @@ __to_intrin(_Tp __x)
 
 // }}}
 // __vector_broadcast{{{
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
+  __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
+  { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
+
 template 
   _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
   __vector_broadcast(_Tp __x)
-  {
-return __call_with_n_evaluations<_Np>(
-  [](auto... __xx) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
-	return __vector_type_t<_Tp, _Np>{__xx...};
-  }, [&__x](int) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x; });
-  }
+  { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
 
 // }}}
 // __generate_vector{{{


[PATCH 0/8] std::experimental::simd patchset

2023-02-23 Thread Matthias Kretz via Gcc-patches
Tested on x86_64-pc-linux.

This patchset provides the final changes for PR108030 and resolves 
PR108856. The latter is a pure optimization and could wait for Stage 1 (I'm 
submitting the patch because simd is experimental/TS)

Matthias Kretz (8):
  libstdc++: Simplify three helper functions into one
  libstdc++: Fix simd build failure on clang
  libstdc++: More efficient masked inc-/decrement implementation
  libstdc++: Add missing constexpr on simd shift implementation
  libstdc++: Always-inline most of non-cmath fixed_size implementation
  libstdc++: Fix formatting
  libstdc++: Fix -Wsign-compare issue
  libstdc++: Test that integral simd reductions are precise

 libstdc++-v3/include/experimental/bits/simd.h | 485 ++--
 .../include/experimental/bits/simd_builtin.h  | 721 +-
 .../include/experimental/bits/simd_detail.h   |   3 +-
 .../experimental/bits/simd_fixed_size.h   | 286 ---
 .../include/experimental/bits/simd_neon.h |  24 +-
 .../include/experimental/bits/simd_ppc.h  |   3 +-
 .../include/experimental/bits/simd_scalar.h   | 362 +
 .../include/experimental/bits/simd_x86.h  | 158 ++--
 .../experimental/simd/tests/reductions.cc |   3 +-
 9 files changed, 1075 insertions(+), 970 deletions(-)

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──



Re: [PATCH 4/8] libstdc++: Add missing constexpr on simd shift implementation

2023-02-23 Thread Matthias Kretz via Gcc-patches
On Thursday, 23 February 2023 12:07:11 CET Jonathan Wakely wrote:
> On Thu, 23 Feb 2023 at 08:55, Matthias Kretz via Libstdc++
> 
>  wrote:
> > Resolves -Wtautological-compare warnings about `if
> > (__builtin_is_constant_evaluated())` in the implementations of these
> > functions.
> 
> The 'inline' is redundant now, because these are unconditionally
> constexpr which implies inline.

In the simd implementation I always have to make a conscious choice of 
always_inline vs. inline. Having the inline keyword there helps documenting 
that choice and helps revisiting all not-always_inline functions quickly.

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


Re: [PATCH 6/8] libstdc++: Fix formatting

2023-02-24 Thread Matthias Kretz via Gcc-patches
On Friday, 24 February 2023 18:14:53 CET Jonathan Wakely wrote:
> Looks like there are a few remaining spaces that could be removed
> where you've joined lines, e.g.

Fixed and pushed.

> OK for trunk anyway (and the branches if you want).

I'll likely backport after I backported all other patches to trunk that came 
before this one.

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


[committed] libstdc++: Resolve -Wunused-variable warnings in stdx::simd and tests

2023-05-22 Thread Matthias Kretz via Gcc-patches
pushed to master, will backport later

regtested on x86_64-linux

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_builtin.h (_S_fpclassify): Move
__infn into #ifdef'ed block.
* testsuite/experimental/simd/tests/fpclassify.cc: Declare
constants only when used.
* testsuite/experimental/simd/tests/frexp.cc: Likewise.
* testsuite/experimental/simd/tests/logarithm.cc: Likewise.
* testsuite/experimental/simd/tests/trunc_ceil_floor.cc:
Likewise.
* testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc:
Move totest and expect1 into #ifdef'ed block.
---
 libstdc++-v3/include/experimental/bits/simd_builtin.h   | 4 ++--
 .../testsuite/experimental/simd/tests/fpclassify.cc | 2 ++
 libstdc++-v3/testsuite/experimental/simd/tests/frexp.cc | 6 ++
 .../experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc| 4 ++--
 libstdc++-v3/testsuite/experimental/simd/tests/logarithm.cc | 4 +++-
 .../testsuite/experimental/simd/tests/trunc_ceil_floor.cc   | 2 ++
 6 files changed, 17 insertions(+), 5 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 4c008da26e0..3d52bc6c96a 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -2370,12 +2370,12 @@ _S_fpclassify(_SimdWrapper<_Tp, _Np> __x)
 	constexpr size_t _NI = sizeof(__xn) / sizeof(_I);
 	_GLIBCXX_SIMD_USE_CONSTEXPR auto __minn
 	  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>));
-	_GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
-	  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
 
 	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal
 	  = __vector_broadcast<_NI, _I>(FP_NORMAL);
 #if !__FINITE_MATH_ONLY__
+	_GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
+	  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
 	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan
 	  = __vector_broadcast<_NI, _I>(FP_NAN);
 	_GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite
diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/fpclassify.cc b/libstdc++-v3/testsuite/experimental/simd/tests/fpclassify.cc
index 00c608f9530..13262df80ac 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/fpclassify.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/fpclassify.cc
@@ -38,9 +38,11 @@ test()
   {
 using T = typename V::value_type;
 using intv = std::experimental::fixed_size_simd;
+#if __GCC_IEC_559 >= 2
 constexpr T inf = std::__infinity_v;
 constexpr T denorm_min = std::__infinity_v;
 constexpr T nan = std::__quiet_NaN_v;
+#endif
 constexpr T max = std::__finite_max_v;
 constexpr T norm_min = std::__norm_min_v;
 test_values(
diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/frexp.cc b/libstdc++-v3/testsuite/experimental/simd/tests/frexp.cc
index f6a47cedd13..2c3f500beee 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/frexp.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/frexp.cc
@@ -25,11 +25,17 @@ test()
   {
 using int_v = std::experimental::fixed_size_simd;
 using T = typename V::value_type;
+#if __GCC_IEC_559 >= 2 || defined __STDC_IEC_559__
 constexpr auto denorm_min = std::__denorm_min_v;
+#endif
+#if __GCC_IEC_559 >= 2
 constexpr auto norm_min = std::__norm_min_v;
+#endif
 constexpr auto max = std::__finite_max_v;
+#if defined __STDC_IEC_559__
 constexpr auto nan = std::__quiet_NaN_v;
 constexpr auto inf = std::__infinity_v;
+#endif
 test_values(
   {0, 0.25, 0.5, 1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 32, 31, -0., -0.25, -0.5, -1,
diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc b/libstdc++-v3/testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc
index 0fb1338fc04..56e275ee4bf 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/ldexp_scalbn_scalbln_modf.cc
@@ -137,7 +137,6 @@ test()
 	if (modf_is_broken)
 	  return;
 	V integral = {};
-	const V totest = modf(input, &integral);
 	auto&& expected = [&](const auto& v) -> std::pair {
 	  std::pair tmp = {};
 	  using std::modf;
@@ -149,8 +148,9 @@ test()
 	}
 	  return tmp;
 	};
-	const auto expect1 = expected(input);
 #ifdef __STDC_IEC_559__
+	const V totest = modf(input, &integral);
+	const auto expect1 = expected(input);
 	COMPARE(isnan(totest), isnan(expect1.first))
 	  << "modf(" << in

[PATCH] libstdc++: Add missing constexpr to simd

2023-05-22 Thread Matthias Kretz via Gcc-patches
OK for trunk and backporting?

regtested on x86_64-linux and aarch64-linux

The constexpr API is only available with -std=gnu++XX (and proposed for
C++26). The proposal is to have the complete simd API usable in constant
expressions.

This patch resolves several issues with using simd in constant
expressions.

Issues why constant_evaluated branches are necessary:
* subscripting vector builtins is not allowed in constant expressions
* if the implementation needs/uses memcpy
* if the implementation would otherwise call SIMD intrinsics/builtins

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109261
* include/experimental/bits/simd.h (_SimdWrapper::_M_set):
Avoid vector builtin subscripting in constant expressions.
(resizing_simd_cast): Avoid memcpy if constant_evaluated.
(const_where_expression, where_expression, where)
(__extract_part, simd_mask, _SimdIntOperators, simd): Add either
_GLIBCXX_SIMD_CONSTEXPR (on public APIs), or constexpr (on
internal APIs).
* include/experimental/bits/simd_builtin.h (__vector_permute)
(__vector_shuffle, __extract_part, _GnuTraits::_SimdCastType1)
(_GnuTraits::_SimdCastType2, _SimdImplBuiltin)
(_MaskImplBuiltin::_S_store): Add constexpr.
(_CommonImplBuiltin::_S_store_bool_array)
(_SimdImplBuiltin::_S_load, _SimdImplBuiltin::_S_store)
(_SimdImplBuiltin::_S_reduce, _MaskImplBuiltin::_S_load): Add
constant_evaluated case.
* include/experimental/bits/simd_fixed_size.h
(_S_masked_load): Reword comment.
(__tuple_element_meta, __make_meta, _SimdTuple::_M_apply_r)
(_SimdTuple::_M_subscript_read, _SimdTuple::_M_subscript_write)
(__make_simd_tuple, __optimize_simd_tuple, __extract_part)
(__autocvt_to_simd, _Fixed::__traits::_SimdBase)
(_Fixed::__traits::_SimdCastType, _SimdImplFixedSize): Add
constexpr.
(_SimdTuple::operator[], _M_set): Add constexpr and add
constant_evaluated case.
(_MaskImplFixedSize::_S_load): Add constant_evaluated case.
* include/experimental/bits/simd_scalar.h: Add constexpr.

* include/experimental/bits/simd_x86.h (_CommonImplX86): Add
constexpr and add constant_evaluated case.
(_SimdImplX86::_S_equal_to, _S_not_equal_to, _S_less)
(_S_less_equal): Value-initialize to satisfy constexpr
evaluation.
(_MaskImplX86::_S_load): Add constant_evaluated case.
(_MaskImplX86::_S_store): Add constexpr and constant_evaluated
case. Value-initialize local variables.
(_MaskImplX86::_S_logical_and, _S_logical_or, _S_bit_not)
(_S_bit_and, _S_bit_or, _S_bit_xor): Add constant_evaluated
case.
* testsuite/experimental/simd/pr109261_constexpr_simd.cc: New
test.
---
 libstdc++-v3/include/experimental/bits/simd.h | 153 ---
 .../include/experimental/bits/simd_builtin.h  | 100 ++
 .../experimental/bits/simd_fixed_size.h   | 177 +-
 .../include/experimental/bits/simd_scalar.h   |  78 
 .../include/experimental/bits/simd_x86.h  |  68 +--
 .../simd/pr109261_constexpr_simd.cc   | 109 +++
 6 files changed, 437 insertions(+), 248 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/experimental/simd/
pr109261_constexpr_simd.cc


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 224153ffbaf..b0571ca26c4 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -2675,7 +2675,14 @@ _SimdWrapper(_V __x)
 
 _GLIBCXX_SIMD_INTRINSIC constexpr void
 _M_set(size_t __i, _Tp __x)
-{ _M_data[__i] = __x; }
+{
+  if (__builtin_is_constant_evaluated())
+	_M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
+		return __j == __i ? __x : _M_data[__j()];
+		  });
+  else
+	_M_data[__i] = __x;
+}
 
 _GLIBCXX_SIMD_INTRINSIC
 constexpr bool
@@ -3186,6 +3193,10 @@ resizing_simd_cast(const simd<_Up, _Ap>& __x)
   {
 if constexpr (is_same_v)
   return __x;
+else if (__builtin_is_constant_evaluated())
+  return _Tp([&](auto __i) constexpr {
+	   return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
+	 });
 else if constexpr (simd_size_v<_Up, _Ap> == 1)
   {
 	_Tp __r{};
@@ -3321,10 +3332,11 @@ __get_lvalue(const const_where_expression& __x)
 
 const_where_expression& operator=(const const_where_expression&) = delete;
 
-_GLIBCXX_SIMD_INTRINSIC const_where_expression(const _M& 

Re: [PATCH] libstdc++: Add missing constexpr to simd

2023-05-22 Thread Matthias Kretz via Gcc-patches
On Monday, 22 May 2023 18:25:15 CEST Jonathan Wakely wrote:
> I note that using if (not __builtin_constant_evaluated()) will fail if
> compiled with -fno-operator-names, which is why we don't use 'not', 'and',
> etc. elsewhere in libstdc++. I don't know if (or why) anybody uses that
> option though, so I don't think you need to hange anything in stdx::simd.

Ah, I just recently convinced myself that "operator-names" are more readable 
(=> easier to maintain). But OTOH a mix isn't necessarily better. I'm fine 
with keeping it consistent.

> > * subscripting vector builtins is not allowed in constant expressions
> 
> Is that just because nobody made it work (yet)?

That is a good question. I guess I should open a PR.

> * if the implementation needs/uses memcpy
> 
> > * if the implementation would otherwise call SIMD intrinsics/builtins
> 
> The indentation looks off here and in the _M_set member function following
> it:

Yes. I had to put an #if between an else and an if. Looks like this:

  else
#ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
if (not __builtin_is_constant_evaluated())
return reinterpret_cast*>(this)[__i];
  else
#endif
if constexpr (__is_scalar_abi<_Abi0>())

Should the `if` be aligned to the `else` instead?

> Are the copyright years on
> testsuite/experimental/simd/pr109261_constexpr_simd.cc correct, or just
> copy&paste?

Right, copy&paste. Should I simply remove the complete header?

- Matthias
-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


[committed] Re: [PATCH] libstdc++: Add missing constexpr to simd

2023-05-23 Thread Matthias Kretz via Gcc-patches
I pushed the attached patch.

I kept the operator names... too late, there were already operator names in 
the stdx::simd implemenation anyway. ;)

- Matthias

On Monday, 22 May 2023 22:51:49 CEST Jonathan Wakely wrote:
> On Mon, 22 May 2023 at 21:27, Matthias Kretz  wrote:
> > On Monday, 22 May 2023 18:25:15 CEST Jonathan Wakely wrote:
> > > I note that using if (not __builtin_constant_evaluated()) will fail if
> > > compiled with -fno-operator-names, which is why we don't use 'not',
> > 
> > 'and',
> > 
> > > etc. elsewhere in libstdc++. I don't know if (or why) anybody uses that
> > > option though, so I don't think you need to hange anything in
> > > stdx::simd.
> > 
> > Ah, I just recently convinced myself that "operator-names" are more
> > readable
> > (=> easier to maintain).
> 
> I tend to agree, but every time I decide to start using them some testcases
> start to fail and I remember why we don't use them :-(
> 
> > But OTOH a mix isn't necessarily better. I'm fine
> > with keeping it consistent.
> > 
> > > > * subscripting vector builtins is not allowed in constant expressions
> > > 
> > > Is that just because nobody made it work (yet)?
> > 
> > That is a good question. I guess I should open a PR.
> > 
> > > * if the implementation needs/uses memcpy
> > > 
> > > > * if the implementation would otherwise call SIMD intrinsics/builtins
> > > 
> > > The indentation looks off here and in the _M_set member function
> > 
> > following
> > 
> > > it:
> > Yes. I had to put an #if between an else and an if. Looks like this:
> >   else
> > 
> > #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
> > 
> > if (not __builtin_is_constant_evaluated())
> > return reinterpret_cast*>(this)[__i];
> >   
> >   else
> > 
> > #endif
> > 
> > if constexpr (__is_scalar_abi<_Abi0>())
> 
> Ah yes, so the if is indented two spaces from the else above it.
> What looks wrong to me is that the return is the at the same indentation as
> the if controlling it.
> 
> > Should the `if` be aligned to the `else` instead?
> 
> How about moving the two else tokens?
> 
>  #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
>else if (not __builtin_is_constant_evaluated())
>  return reinterpret_cast*>(this)[__i];
>  #endif
>else if constexpr (__is_scalar_abi<_Abi0>())
> 
> I think that avoids the issue.
> 
> > > Are the copyright years on
> > > testsuite/experimental/simd/pr109261_constexpr_simd.cc correct, or just
> > > copy&paste?
> > 
> > Right, copy&paste. Should I simply remove the complete header?
> 
> You could do. I don't think there's much in that test that's novel or worth
> asserting copyright over - but if you disagree and want to assign whatever
> is copyrightable to the FSF, keep the header but fix the years. Either way
> is fine by me.
> 
> OK for trunk and backports, with the comments above suitably resolved.


-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──
diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 224153ffbaf..b0571ca26c4 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -2675,7 +2675,14 @@ _SimdWrapper(_V __x)
 
 _GLIBCXX_SIMD_INTRINSIC constexpr void
 _M_set(size_t __i, _Tp __x)
-{ _M_data[__i] = __x; }
+{
+  if (__builtin_is_constant_evaluated())
+	_M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
+		return __j == __i ? __x : _M_data[__j()];
+		  });
+  else
+	_M_data[__i] = __x;
+}
 
 _GLIBCXX_SIMD_INTRINSIC
 constexpr bool
@@ -3186,6 +3193,10 @@ resizing_simd_cast(const simd<_Up, _Ap>& __x)
   {
 if constexpr (is_same_v)
   return __x;
+else if (__builtin_is_constant_evaluated())
+  return _Tp([&](auto __i) constexpr {
+	   return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
+	 });
 else if constexpr (simd_size_v<_Up, _Ap> == 1)
   {
 	_Tp __r{};
@@ -3321,10 +3332,11 @@ __get_lvalue(const const_where_expression& __x)
 
 const_where_expression& operator=(const const_where_expression&) = delete;
 
-_GLIBCXX_SIMD_INTRINSIC const_where_expression(const _M& __kk, const _Tp& dd)
-  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
+_GLIBCXX_SIMD_INTRINSIC constexpr
+const_where_expression(const _M& __kk, const _Tp& dd)
+: _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
 
-_GLIBCXX_SIMD_INTRINSIC _V
+_GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
 operator-() const&&
 {
   return {__private_init,
@@ -,7 +3345,7 @@ __get_lvalue(const const_where_expression& __x)
 }
 
 template 
-  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
+  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIB

[PATCH] libstdc++: Add missing constexpr to simd_neon

2023-05-23 Thread Matthias Kretz via Gcc-patches

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109261
* include/experimental/bits/simd_neon.h (_S_reduce): Add
constexpr and make NEON implementation conditional on
not __builtin_is_constant_evaluated.
---
 .../include/experimental/bits/simd_neon.h | 76 +--
 1 file changed, 36 insertions(+), 40 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_neon.h b/libstdc++-v3/include/experimental/bits/simd_neon.h
index 637b121b130..8f732d7587b 100644
--- a/libstdc++-v3/include/experimental/bits/simd_neon.h
+++ b/libstdc++-v3/include/experimental/bits/simd_neon.h
@@ -84,50 +84,46 @@ _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem,
 // }}}
 // _S_reduce {{{
 template 
-  _GLIBCXX_SIMD_INTRINSIC static _Tp
+  _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
   _S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
   {
-	constexpr size_t _Np = __x.size();
-	if constexpr (sizeof(__x) == 16 && _Np >= 4
-		  && !_Abi::template _S_is_partial<_Tp>)
-	  {
-	const auto __halves = split>>(__x);
-	const auto __y = __binary_op(__halves[0], __halves[1]);
-	return _SimdImplNeon>::_S_reduce(
-	  __y, static_cast<_BinaryOperation&&>(__binary_op));
-	  }
-	else if constexpr (_Np == 8)
-	  {
-	__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
- __vector_permute<1, 0, 3, 2, 5, 4, 7, 6>(
-   __x._M_data)));
-	__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
- __vector_permute<3, 2, 1, 0, 7, 6, 5, 4>(
-   __x._M_data)));
-	__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
- __vector_permute<7, 6, 5, 4, 3, 2, 1, 0>(
-   __x._M_data)));
-	return __x[0];
-	  }
-	else if constexpr (_Np == 4)
-	  {
-	__x
-	  = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
-   __vector_permute<1, 0, 3, 2>(__x._M_data)));
-	__x
-	  = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
-   __vector_permute<3, 2, 1, 0>(__x._M_data)));
-	return __x[0];
-	  }
-	else if constexpr (_Np == 2)
+	if (not __builtin_is_constant_evaluated())
 	  {
-	__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
- __vector_permute<1, 0>(__x._M_data)));
-	return __x[0];
+	constexpr size_t _Np = __x.size();
+	if constexpr (sizeof(__x) == 16 && _Np >= 4
+			&& !_Abi::template _S_is_partial<_Tp>)
+	  {
+		const auto __halves = split>>(__x);
+		const auto __y = __binary_op(__halves[0], __halves[1]);
+		return _SimdImplNeon>::_S_reduce(
+			 __y, static_cast<_BinaryOperation&&>(__binary_op));
+	  }
+	else if constexpr (_Np == 8)
+	  {
+		__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
+	 __vector_permute<1, 0, 3, 2, 5, 4, 7, 6>(__x._M_data)));
+		__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
+	 __vector_permute<3, 2, 1, 0, 7, 6, 5, 4>(__x._M_data)));
+		__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
+	 __vector_permute<7, 6, 5, 4, 3, 2, 1, 0>(__x._M_data)));
+		return __x[0];
+	  }
+	else if constexpr (_Np == 4)
+	  {
+		__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
+	 __vector_permute<1, 0, 3, 2>(__x._M_data)));
+		__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
+	 __vector_permute<3, 2, 1, 0>(__x._M_data)));
+		return __x[0];
+	  }
+	else if constexpr (_Np == 2)
+	  {
+		__x = __binary_op(__x, _Base::template _M_make_simd<_Tp, _Np>(
+	 __vector_permute<1, 0>(__x._M_data)));
+		return __x[0];
+	  }
 	  }
-	else
-	  return _Base::_S_reduce(__x,
-  static_cast<_BinaryOperation&&>(__binary_op));
+	return _Base::_S_reduce(__x, static_cast<_BinaryOperation&&>(__binary_op));
   }
 
 // }}}


[PATCH] libstdc++: Fix SFINAE for __is_intrinsic_type on ARM

2023-05-24 Thread Matthias Kretz via Gcc-patches
OK for master and all branches? (this issue only surfaced because of the new 
test)

 8< -

On ARM NEON doesn't support double, so __is_intrinsic_type_v should say false (instead of being ill-formed).

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109261
* include/experimental/bits/simd.h (__intrinsic_type):
Specialize __intrinsic_type and
__intrinsic_type in any case, but provide the member
type only with __aarch64__.
---
 libstdc++-v3/include/experimental/bits/simd.h | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index b0571ca26c4..d1f388310f9 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -2369,15 +2369,21 @@ struct __intrinsic_type
   struct __intrinsic_type
   { using type = float32x4_t; };
 
-#if _GLIBCXX_SIMD_HAVE_NEON_A64
 template <>
   struct __intrinsic_type
-  { using type = float64x1_t; };
+  {
+#if _GLIBCXX_SIMD_HAVE_NEON_A64
+   using type = float64x1_t;
+#endif
+  };
 
 template <>
   struct __intrinsic_type
-  { using type = float64x2_t; };
+  {
+#if _GLIBCXX_SIMD_HAVE_NEON_A64
+using type = float64x2_t;
 #endif
+  };
 
 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np)   \
 template <>\


[PATCH] libstdc++: Fix type of first argument to vec_cntm call

2023-05-24 Thread Matthias Kretz via Gcc-patches
OK for master and backports? (also a long-standing bug that didn't surface 
until the new constexpr test was added)

tested on powerpc64le-linux-gnu

- 8< -

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109949
* include/experimental/bits/simd.h (__intrinsic_type): If
__ALTIVEC__ is defined, map gnu::vector_size types to their
corresponding __vector T types without losing unsignedness of
integer types. Also prefer long long over long.
* include/experimental/bits/simd_ppc.h (_S_popcount): Cast mask
object to the expected unsigned vector type.
---
 libstdc++-v3/include/experimental/bits/simd.h | 39 ---
 .../include/experimental/bits/simd_ppc.h  |  3 +-
 2 files changed, 36 insertions(+), 6 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index d1f388310f9..26f08f83ab0 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -2466,11 +2466,40 @@ struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _By
 		  "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
 #endif
 
-using type =
-  typename __intrinsic_type_impl<
-		 conditional_t,
-			   conditional_t<_S_is_ldouble, double, _Tp>,
-			   __int_for_sizeof_t<_Tp>>>::type;
+static constexpr auto __element_type()
+{
+  if constexpr (is_floating_point_v<_Tp>)
+	{
+	  if constexpr (_S_is_ldouble)
+	return double {};
+	  else
+	return _Tp {};
+	}
+  else if constexpr (is_signed_v<_Tp>)
+	{
+	  if constexpr (sizeof(_Tp) == sizeof(_SChar))
+	return _SChar {};
+	  else if constexpr (sizeof(_Tp) == sizeof(short))
+	return short {};
+	  else if constexpr (sizeof(_Tp) == sizeof(int))
+	return int {};
+	  else if constexpr (sizeof(_Tp) == sizeof(_LLong))
+	return _LLong {};
+	}
+  else
+	{
+	  if constexpr (sizeof(_Tp) == sizeof(_UChar))
+	return _UChar {};
+	  else if constexpr (sizeof(_Tp) == sizeof(_UShort))
+	return _UShort {};
+	  else if constexpr (sizeof(_Tp) == sizeof(_UInt))
+	return _UInt {};
+	  else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
+	return _ULLong {};
+	}
+}
+
+using type = typename __intrinsic_type_impl::type;
   };
 #endif // __ALTIVEC__
 
diff --git a/libstdc++-v3/include/experimental/bits/simd_ppc.h b/libstdc++-v3/include/experimental/bits/simd_ppc.h
index eca1b34241b..2ea7234bd99 100644
--- a/libstdc++-v3/include/experimental/bits/simd_ppc.h
+++ b/libstdc++-v3/include/experimental/bits/simd_ppc.h
@@ -130,7 +130,8 @@ _S_popcount(simd_mask<_Tp, _Abi> __k)
 	const auto __kv = __as_vector(__k);
 	if constexpr (__have_power10vec)
 	  {
-	return vec_cntm(__to_intrin(__kv), 1);
+	using _Intrin = __intrinsic_type16_t>>;
+	return vec_cntm(reinterpret_cast<_Intrin>(__kv), 1);
 	  }
 	else if constexpr (sizeof(_Tp) >= sizeof(int))
 	  {


Re: [PATCH] libstdc++: Implement std::unreachable() for C++23 (P0627R6)

2022-03-31 Thread Matthias Kretz via Gcc-patches
I like it. But I'd like it even more if we could have

#elif defined _UBSAN
__ubsan_invoke_ub("reached std::unreachable()");

But to my knowledge UBSAN has no hooks for the library like this (yet).

and...

On Thursday, 31 March 2022 17:30:29 CEST Jonathan Wakely via Gcc-patches 
wrote:
> diff --git a/libstdc++-v3/include/std/utility
> b/libstdc++-v3/include/std/utility index 0d7f8954c5a..e5b5212381d 100644
> --- a/libstdc++-v3/include/std/utility
> +++ b/libstdc++-v3/include/std/utility
> @@ -186,6 +186,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>  constexpr underlying_type_t<_Tp>
>  to_underlying(_Tp __value) noexcept
>  { return static_cast>(__value); }
> +
> +#define __cpp_lib_unreachable 202202L
> +  [[noreturn,__gnu__::__always_inline__]]
> +  void
> +  unreachable()
> +  {
> +#ifdef _GLIBCXX_DEBUG
> +std::__glibcxx_assert_fail("", 0, "std::unreachable()",
> +  "inconceivable!");

Funny message, but it should be more helpful, IMHO. :)

-Matthias

> +#elif defined _GLIBCXX_ASSERTIONS
> +__builtin_trap();
> +#else
> +__builtin_unreachable();
> +#endif
> +  }


-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


Re: [PATCH] libstdc++: Implement std::unreachable() for C++23 (P0627R6)

2022-04-01 Thread Matthias Kretz via Gcc-patches
On Friday, 1 April 2022 13:33:42 CEST Jonathan Wakely wrote:
> Matthias didn't like my Princess Bride easter egg :-)
> Would the attached be better?

LGTM.

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


Re: [PATCH] libstdc++: ppc: conditionalize vsx-only simd intrinsics

2022-04-28 Thread Matthias Kretz via Gcc-patches
On Thursday, 28 April 2022 08:09:54 CEST Alexandre Oliva via Gcc-patches 
wrote:
> libstdc++'s bits/simd.h section for PPC (Altivec) defines various
> intrinsic vector types that are only available along with VSX: 64-bit
> long double, double, (un)signed long long, and 64-bit (un)signed long.

Oh, so uttering `__vector double` is ill-formed (now) without VSX? I'm fairly 
certain I tested without VSX and the __intrinsic_type_impl definitions were 
fine.

> experimental/simd/standard_abi_usable{,_2}.cc tests error out reporting
> the unmet requirements when the target cpu doesn't enable VSX.  Make the
> reported instrinsic types conditional on VSX so that 
> can be used on ppc variants that do not have VSX support.

IIRC this will break other valid uses. You'd have to run `make check-simd` 
(see libstdc++-v3/testsuite/experimental/simd/README.md) to be certain nothing 
breaks. I will also take a look.

> Regstrapped on powerpc64el-linux-gnu.  Ok to install?
> 
> This is also relevant for gcc-11.  Tested with
> x86_64-linux-gnu-x-ppc64-vx7r2.  Ok for gcc-11?
> 
> 
> for  libstdc++-v3/ChangeLog
> 
>   * include/experimental/bits/simd.h [__ALTIVEC__]: Require VSX
>   for double, long long, and 64-bit long and 64-bit long double
>   intrinsic types.
> ---
>  libstdc++-v3/include/experimental/bits/simd.h |   11 +--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/libstdc++-v3/include/experimental/bits/simd.h
> b/libstdc++-v3/include/experimental/bits/simd.h index
> 82e9841195e1d..66c07127ec435 100644
> --- a/libstdc++-v3/include/experimental/bits/simd.h
> +++ b/libstdc++-v3/include/experimental/bits/simd.h
> @@ -2430,17 +2430,23 @@ template 
>template <>  
>\ struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
> _GLIBCXX_SIMD_PPC_INTRIN(float);
> +# ifdef __VSX__
>  _GLIBCXX_SIMD_PPC_INTRIN(double);
> +# endif
>  _GLIBCXX_SIMD_PPC_INTRIN(signed char);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
>  _GLIBCXX_SIMD_PPC_INTRIN(signed short);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
>  _GLIBCXX_SIMD_PPC_INTRIN(signed int);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
> +# if defined __VSX__ || __LONG_WIDTH__ == 32
>  _GLIBCXX_SIMD_PPC_INTRIN(signed long);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
> +# endif
> +# ifdef __VSX__
>  _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
>  _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
> +# endif
>  #undef _GLIBCXX_SIMD_PPC_INTRIN
> 
>  template 
> @@ -2452,8 +2458,9 @@ template 
>  static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
> "no __intrinsic_type support for long double on PPC");
>  #ifndef __VSX__
> -static_assert(!is_same_v<_Tp, double>,
> -   "no __intrinsic_type support for double on PPC w/o VSX");
> +static_assert(!(is_same_v<_Tp, double>
> + || (_S_is_ldouble && sizeof(long double) == 
sizeof(double))),
> +   "no __intrinsic_type support for [long] double on PPC w/o 
VSX");

The missing condition here was an incorrect omission. With -mlong-double-64 
and without VSX no assertion caught the issue.

IIRC, a user won't get to see this error message unless there's a bug in the 
simd library implementation, so the error message is good enough for me. (It's 
talking about __intrinsic_type, the user would be lost in any case.)

>  #endif
>  using type =
>typename __intrinsic_type_impl<


-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──





Re: [PATCH] libstdc++: ppc: conditionalize vsx-only simd intrinsics

2022-04-29 Thread Matthias Kretz via Gcc-patches
On Friday, 29 April 2022 03:53:40 CEST Alexandre Oliva via Gcc-patches wrote:
> Thanks, awaiting feedback on the suggestion above to post the consolidated
> patch.

LGTM. I think this improves clarity for the __intrisic_type static assertions 
significantly.

And don't bother with my other mail. If `__vector double` without VSX was 
always ill-formed then I must be misremembering something.

Cheers,
  Matthias

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──


[PATCH 0/2] Make std::experimental::simd (more) usable with Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches
Up to now, compiling std::experimental::simd with Clang would lead to wrong 
code, not compiling, or ICEs. After these patches I hope it's only ICEs.

Tested on x86_64-pc-linux-gnu.

Matthias Kretz (2):
  libstdc++: Fix simd test compilation with Clang
  libstdc++: Fix simd compilation with Clang

 .../include/experimental/bits/simd_detail.h   |  2 +-
 .../include/experimental/bits/simd_x86.h  | 59 +--
 .../experimental/simd/tests/operators.cc  |  9 ++-
 3 files changed, 64 insertions(+), 6 deletions(-)

-- 
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──



[PATCH 1/2] libstdc++: Fix simd test compilation with Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches


Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* testsuite/experimental/simd/tests/operators.cc: Clang doesn't
define __GCC_IEC_559. Use __STDC_IEC_559__ instead.
---
 .../testsuite/experimental/simd/tests/operators.cc   | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/testsuite/experimental/simd/tests/operators.cc b/libstdc++-v3/testsuite/experimental/simd/tests/operators.cc
index c4d91fa5d2b..72af7061c73 100644
--- a/libstdc++-v3/testsuite/experimental/simd/tests/operators.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/tests/operators.cc
@@ -211,7 +211,14 @@ test()
 }
 
 // divides
-constexpr bool is_iec559 = __GCC_IEC_559 >= 2;
+constexpr bool is_iec559 =
+#ifdef __GCC_IEC_559
+  __GCC_IEC_559 >= 2;
+#elif defined __STDC_IEC_559__
+  true;
+#else
+  false;
+#endif
 if constexpr (std::is_floating_point_v && !is_iec559)
   { // avoid testing subnormals and expect minor deltas for non-IEC559 float
 	V x = 2;


[PATCH 2/2] libstdc++: Fix simd compilation with Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches


Clang fails to compile some constant expressions involving simd.
Therefore, just disable this non-conforming extension for clang.

Fix AVX512 blend implementation for Clang. It was converting the bitmask
to bool before, which is obviously wrong. Instead use a Clang builtin to
convert the bitmask to vector-mask before using a vector blend ?:. A
similar change is required for the masked unary implementation, because
the GCC builtins do not exist on Clang.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_detail.h: Don't declare the
simd API as constexpr with Clang.
* include/experimental/bits/simd_x86.h (__movm): New.
(_S_blend_avx512): Resolve FIXME. Implement blend using __movm
and ?:.
(_SimdImplX86::_S_masked_unary): Clang does not implement the
same builtins. Implement the function using __movm, ?:, and -
operators on vector_size types instead.
---
 .../include/experimental/bits/simd_detail.h   |  2 +-
 .../include/experimental/bits/simd_x86.h  | 59 +--
 2 files changed, 56 insertions(+), 5 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h
index 30cc1ef0eef..f3745bf3e4c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -267,7 +267,7 @@ namespace experimental
 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
 
-#if defined __STRICT_ANSI__ && __STRICT_ANSI__
+#if (defined __STRICT_ANSI__ && __STRICT_ANSI__) || defined __clang__
 #define _GLIBCXX_SIMD_CONSTEXPR
 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
 #else
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 608918542c6..165738c4e2c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -363,6 +363,53 @@ __maskload_pd(const double* __ptr, _Tp __k)
 
 // }}}
 
+#ifdef __clang__
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  __movm(_Kp __k) noexcept
+  {
+static_assert(is_unsigned_v<_Kp>);
+if constexpr (sizeof(_Tp) == 1 && __have_avx512bw)
+  {
+	if constexpr (_Np <= 16 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2b128(__k);
+	else if constexpr (_Np <= 32 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2b256(__k);
+	else
+	  return __builtin_ia32_cvtmask2b512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 2 && __have_avx512bw)
+  {
+	if constexpr (_Np <= 8 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2w128(__k);
+	else if constexpr (_Np <= 16 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2w256(__k);
+	else
+	  return __builtin_ia32_cvtmask2w512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 4 && __have_avx512dq)
+  {
+	if constexpr (_Np <= 4 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2d128(__k);
+	else if constexpr (_Np <= 8 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2d256(__k);
+	else
+	  return __builtin_ia32_cvtmask2d512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 8 && __have_avx512dq)
+  {
+	if constexpr (_Np <= 2 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2q128(__k);
+	else if constexpr (_Np <= 4 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2q256(__k);
+	else
+	  return __builtin_ia32_cvtmask2q512(__k);
+  }
+else
+  __assert_unreachable<_Tp>();
+  }
+#endif // __clang__
+
 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
 #include "simd_x86_conversions.h"
 #endif
@@ -619,14 +666,13 @@ _pdep_u32(
 _GLIBCXX_SIMD_INTRINSIC static _TV
 _S_blend_avx512(const _Kp __k, const _TV __a, const _TV __b) noexcept
 {
-#ifdef __clang__
-  // FIXME: this does a boolean choice, not a blend
-  return __k ? __a : __b;
-#else
   static_assert(__is_vector_type_v<_TV>);
   using _Tp = typename _VectorTraits<_TV>::value_type;
   static_assert(sizeof(_TV) >= 16);
   static_assert(sizeof(_Tp) <= 8);
+#ifdef __clang__
+  return __movm<_VectorTraits<_TV>::_S_full_size, _Tp>(__k) ? __b : __a;
+#else
   using _IntT
 	= conditional_t<(sizeof(_Tp) > 2),
 			conditional_t,
@@ -3483,6 +3529,10 @@ _S_masked_unary(const _SimdWrapper<_K, _Np> __k, const _SimdWrapper<_Tp, _Np> __
 	// optimize masked unary increment and decrement as masked sub +/-1
 	constexpr int __pm_one
 	  = is_same_v<_Op, __increment> ? -1 : 1;
+#ifdef __clang__
+	return __vector_bitcast<_Tp, _Np>(__movm<_Np, _Tp>(__k._M_data

[committed] libstdc++: Fix simd compilation with Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches
Slightly modified patch. I had to fix floating-point AVX512 blending on 
Clang by removing a cast. While at it I cleaned up the -Wundef noise.

- 8< --

Clang fails to compile some constant expressions involving simd.
Therefore, just disable this non-conforming extension for clang.

Fix AVX512 blend implementation for Clang. It was converting the bitmask
to bool before, which is obviously wrong. Instead use a Clang builtin to
convert the bitmask to vector-mask before using a vector blend ?:. A
similar change is required for the masked unary implementation, because
the GCC builtins do not exist on Clang.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_detail.h: Don't declare the
simd API as constexpr with Clang.
* include/experimental/bits/simd_x86.h (__movm): New.
(_S_blend_avx512): Resolve FIXME. Implement blend using __movm
and ?:.
(_SimdImplX86::_S_masked_unary): Clang does not implement the
same builtins. Implement the function using __movm, ?:, and -
operators on vector_size types instead.
---
 .../include/experimental/bits/simd_detail.h   |  2 +-
 .../include/experimental/bits/simd_x86.h  | 58 +--
 2 files changed, 55 insertions(+), 5 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h
index 30cc1ef0eef..49b94decf0a 100644
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -267,7 +267,7 @@ namespace experimental
 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
 
-#if defined __STRICT_ANSI__ && __STRICT_ANSI__
+#if __STRICT_ANSI__ || defined __clang__
 #define _GLIBCXX_SIMD_CONSTEXPR
 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
 #else
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 608918542c6..7b8f1c664b3 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -363,6 +363,53 @@ __maskload_pd(const double* __ptr, _Tp __k)
 
 // }}}
 
+#ifdef __clang__
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  __movm(_Kp __k) noexcept
+  {
+static_assert(is_unsigned_v<_Kp>);
+if constexpr (sizeof(_Tp) == 1 && __have_avx512bw)
+  {
+	if constexpr (_Np <= 16 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2b128(__k);
+	else if constexpr (_Np <= 32 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2b256(__k);
+	else
+	  return __builtin_ia32_cvtmask2b512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 2 && __have_avx512bw)
+  {
+	if constexpr (_Np <= 8 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2w128(__k);
+	else if constexpr (_Np <= 16 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2w256(__k);
+	else
+	  return __builtin_ia32_cvtmask2w512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 4 && __have_avx512dq)
+  {
+	if constexpr (_Np <= 4 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2d128(__k);
+	else if constexpr (_Np <= 8 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2d256(__k);
+	else
+	  return __builtin_ia32_cvtmask2d512(__k);
+  }
+else if constexpr (sizeof(_Tp) == 8 && __have_avx512dq)
+  {
+	if constexpr (_Np <= 2 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2q128(__k);
+	else if constexpr (_Np <= 4 && __have_avx512vl)
+	  return __builtin_ia32_cvtmask2q256(__k);
+	else
+	  return __builtin_ia32_cvtmask2q512(__k);
+  }
+else
+  __assert_unreachable<_Tp>();
+  }
+#endif // __clang__
+
 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
 #include "simd_x86_conversions.h"
 #endif
@@ -619,14 +666,13 @@ _pdep_u32(
 _GLIBCXX_SIMD_INTRINSIC static _TV
 _S_blend_avx512(const _Kp __k, const _TV __a, const _TV __b) noexcept
 {
-#ifdef __clang__
-  // FIXME: this does a boolean choice, not a blend
-  return __k ? __a : __b;
-#else
   static_assert(__is_vector_type_v<_TV>);
   using _Tp = typename _VectorTraits<_TV>::value_type;
   static_assert(sizeof(_TV) >= 16);
   static_assert(sizeof(_Tp) <= 8);
+#ifdef __clang__
+  return __movm<_VectorTraits<_TV>::_S_full_size, _Tp>(__k) ? __b : __a;
+#else
   using _IntT
 	= conditional_t<(sizeof(_Tp) > 2),
 			conditional_t,
@@ -3483,6 +3529,9 @@ _S_masked_unary(const _SimdWrapper<_K, _Np> __k, const _SimdWrapper<_Tp, _Np> __
 	// optimize masked unary increment and decrement as masked sub +/-1
 	constexpr int __pm_one
 	   

[PATCH] libstdc++: Skip integer division optimization for Clang

2023-03-21 Thread Matthias Kretz via Gcc-patches
Tested on x86_64-pc-linux-gnu.

- 8< ---

Clang ICEs on _SimdImplX86::_S_divides. The function is only working
around a missed optimization and not necessary for correctness.
Therefore, don't use it for Clang.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_detail.h: Don't define
_GLIBCXX_SIMD_WORKAROUND_PR90993 for Clang.
* include/experimental/bits/simd_x86.h (_S_divides): Remove
check for __clang__.
---
 libstdc++-v3/include/experimental/bits/simd_detail.h | 2 ++
 libstdc++-v3/include/experimental/bits/simd_x86.h| 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h
index 49b94decf0a..1fb77866bb2 100644
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -320,7 +320,9 @@ namespace experimental
 #endif
 
 // integer division not optimized
+#ifndef __clang__
 #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
+#endif
 
 // very bad codegen for extraction and concatenation of 128/256 "subregisters"
 // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 7b8f1c664b3..28ba344c2b2 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -1469,7 +1469,7 @@ _CsrGuard()
 		[&__xf, &__yf](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
 		  -> _SimdWrapper<_Float, __n_intermediate>
 		{
-#if !defined __clang__ && __GCC_IEC_559 == 0
+#if __GCC_IEC_559 == 0
 		  // If -freciprocal-math is active, using the `/` operator is
 		  // incorrect because it may be translated to an imprecise
 		  // multiplication with reciprocal. We need to use inline
@@ -1524,7 +1524,7 @@ _CsrGuard()
 	  */
 	return _Base::_S_divides(__x, __y);
   }
-  #endif // _GLIBCXX_SIMD_WORKAROUND_PR90993
+#endif // _GLIBCXX_SIMD_WORKAROUND_PR90993
 
 // }}}
 // _S_modulus {{{


[PATCH] libstdc++: Use more precise __RECIPROCAL_MATH__ macro

2023-03-21 Thread Matthias Kretz via Gcc-patches
Tested on x86_64-pc-linux-gnu.

- 8< ---

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h
(_SimdImplX86::_S_divides): Replace test for __GCC_IEC_559 == 0
with __RECIPROCAL_MATH__.
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 28ba344c2b2..2a3e74d9119 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -1469,7 +1469,7 @@ _CsrGuard()
 		[&__xf, &__yf](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
 		  -> _SimdWrapper<_Float, __n_intermediate>
 		{
-#if __GCC_IEC_559 == 0
+#if __RECIPROCAL_MATH__
 		  // If -freciprocal-math is active, using the `/` operator is
 		  // incorrect because it may be translated to an imprecise
 		  // multiplication with reciprocal. We need to use inline


[PATCH] libstdc++: Add missing trait is_simd_flag_type

2023-03-28 Thread Matthias Kretz via Gcc-patches
I don't know how, but I forgot to implement the 
is_simd_flag_type trait. This fixes it.

No new check-simd failures on x86_64-pc-linux-gnu

--- 8< --

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h (is_simd_flag_type): New.
(_IsSimdFlagType): New.
(copy_from, copy_to, load ctors): Constrain _Flags using
_IsSimdFlagType.
---
 libstdc++-v3/include/experimental/bits/simd.h | 52 ++-
 1 file changed, 39 insertions(+), 13 deletions(-)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h
index 4df446654ac..224153ffbaf 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -2837,6 +2837,32 @@ __determine_native_abi()
 } // namespace simd_abi
 
 // traits {{{1
+template 
+  struct is_simd_flag_type
+  : false_type
+  {};
+
+template <>
+  struct is_simd_flag_type
+  : true_type
+  {};
+
+template <>
+  struct is_simd_flag_type
+  : true_type
+  {};
+
+template 
+  struct is_simd_flag_type>
+  : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
+  {};
+
+template 
+  inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
+
+template >>
+  using _IsSimdFlagType = _Tp;
+
 // is_abi_tag {{{2
 template >
   struct is_abi_tag : false_type {};
@@ -3308,7 +3334,7 @@ __get_lvalue(const const_where_expression& __x)
 
 template 
   [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
-  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
+  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
   {
 	return {__private_init,
 		_Impl::_S_masked_load(__data(_M_value), __data(_M_k),
@@ -3317,7 +3343,7 @@ __get_lvalue(const const_where_expression& __x)
 
 template 
   _GLIBCXX_SIMD_INTRINSIC void
-  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
+  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
   {
 	_Impl::_S_masked_store(__data(_M_value),
 			   _Flags::template _S_apply<_V>(__mem),
@@ -3363,12 +3389,12 @@ __get_lvalue(const const_where_expression& __x)
 
 template 
   [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _V
-  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
+  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
   { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
 
 template 
   _GLIBCXX_SIMD_INTRINSIC void
-  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _Flags) const&&
+  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
   {
 	if (_M_k)
 	  __mem[0] = _M_value;
@@ -3466,7 +3492,7 @@ static_assert(
 // intentionally hides const_where_expression::copy_from
 template 
   _GLIBCXX_SIMD_INTRINSIC void
-  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
+  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
   {
 	__data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
 		 _Flags::template _S_apply<_Tp>(__mem));
@@ -3529,7 +3555,7 @@ where_expression(const _M& __kk, _Tp& dd)
 // intentionally hides const_where_expression::copy_from
 template 
   _GLIBCXX_SIMD_INTRINSIC void
-  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _Flags) &&
+  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
   { if (_M_k) _M_value = __mem[0]; }
   };
 
@@ -4574,12 +4600,12 @@ simd_mask(const simd_mask<_Up, simd_abi::fixed_size>& __x)
 // load constructor {{{
 template 
   _GLIBCXX_SIMD_ALWAYS_INLINE
-  simd_mask(const value_type* __mem, _Flags)
+  simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
   : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply(__mem))) {}
 
 template 
   _GLIBCXX_SIMD_ALWAYS_INLINE
-  simd_mask(const value_type* __mem, simd_mask __k, _Flags)
+  simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
   : _M_data{}
   {
 	_M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
@@ -4590,14 +4616,14 @@ simd_mask(const simd_mask<_Up, simd_abi::fixed_size>& __x)
 // loads [simd_mask.load] {{{
 template 
   _GLIBCXX_SIMD_ALWAYS_INLINE void
-  copy_from(const value_type* __mem, _Flags)
+  copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
   { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply(__mem)); }
 
 // }}}
 

[committed] libstdc++: Fix operator% implementation for Clang

2023-03-28 Thread Matthias Kretz via Gcc-patches


This resolves a regression of my previous fix where Clang would ICE on
_S_divides.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h (_SimdImplX86): Use
_Base::_S_divides if the optimized _S_divides function is hidden
via the preprocessor.
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 2 ++
 1 file changed, 2 insertions(+)


--
──
 Dr. Matthias Kretz   https://mattkretz.github.io
 GSI Helmholtz Centre for Heavy Ion Research   https://gsi.de
 stdₓ::simd
──diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 2a3e74d9119..fc3e96d696c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -1524,6 +1524,8 @@ _CsrGuard()
 	  */
 	return _Base::_S_divides(__x, __y);
   }
+#else
+using _Base::_S_divides;
 #endif // _GLIBCXX_SIMD_WORKAROUND_PR90993
 
 // }}}