[gcc r13-8704] libstdc++: Protect against macros

2024-05-07 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:0f4336c490e565523652ba276886e06b89cf2441

commit r13-8704-g0f4336c490e565523652ba276886e06b89cf2441
Author: Matthias Kretz 
Date:   Fri Jun 2 21:21:36 2023 +0200

libstdc++: Protect against macros

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h (__bit_cast): Use
__gnu__::__vector_size__ instead of gnu::vector_size.

(cherry picked from commit ce2188e4320cbb46d6246bd3f478ba20440c62f3)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 834fe923065b..90523ea57dc7 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1743,18 +1743,18 @@ template 
   return reinterpret_cast<_To>(__x);
 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
   {
-   using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
+   using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
return reinterpret_cast<_To>(_FV{__x});
   }
 else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
   {
-   using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
-   using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
+   using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
+   using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
return reinterpret_cast<_TV>(_FV{__x})[0];
   }
 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
   {
-   using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
+   using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
return reinterpret_cast<_TV>(__x)[0];
   }
 else


[gcc r13-8705] libstdc++: Replace use of incorrect non-temporal store

2024-05-07 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:63b73f14be97a9fe8621ec1a523b6dd19bde72dc

commit r13-8705-g63b73f14be97a9fe8621ec1a523b6dd19bde72dc
Author: Matthias Kretz 
Date:   Fri Jun 2 13:44:22 2023 +0200

libstdc++: Replace use of incorrect non-temporal store

The call to the base implementation sometimes didn't find a matching
signature because the _Abi parameter of _SimdImpl* was "wrong" after
conversion. It has to call into ::_SimdImpl instead of the
current ABI tag's _SimdImpl. This also reduces the number of possible
template instantiations.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/110054
* include/experimental/bits/simd_builtin.h (_S_masked_store):
Call into deduced ABI's SimdImpl after conversion.
* include/experimental/bits/simd_x86.h (_S_masked_store_nocvt):
Don't use _mm_maskmoveu_si128. Use the generic fall-back
implementation. Also fix masked stores without SSE2, which
were not doing anything before.

(cherry picked from commit 27e45b7597d6fb1a71927d658a0294797b720c0a)

Diff:
---
 .../include/experimental/bits/simd_builtin.h   |  6 ++--
 libstdc++-v3/include/experimental/bits/simd_x86.h  | 38 +++---
 2 files changed, 7 insertions(+), 37 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 8337fa2d9a6c..64ef6efaf8ca 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1628,7 +1628,7 @@ template 
if constexpr (_UW_size == _TV_size) // one convert+store
  {
const _UW __converted = __convert<_UW>(__v);
-   _SuperImpl::_S_masked_store_nocvt(
+   _UAbi::_SimdImpl::_S_masked_store_nocvt(
  __converted, __mem,
  _UAbi::_MaskImpl::template _S_convert<
__int_for_sizeof_t<_Up>>(__k));
@@ -1643,7 +1643,7 @@ template 
const array<_UV, _NAllStores> __converted
  = __convert_all<_UV, _NAllStores>(__v);
__execute_n_times<_NFullStores>([&](auto __i) 
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
- _SuperImpl::_S_masked_store_nocvt(
+ _UAbi::_SimdImpl::_S_masked_store_nocvt(
_UW(__converted[__i]), __mem + __i * _UW_size,
_UAbi::_MaskImpl::template _S_convert<
  __int_for_sizeof_t<_Up>>(
@@ -1651,7 +1651,7 @@ template 
});
if constexpr (_NAllStores
  > _NFullStores) // one partial at the end
- _SuperImpl::_S_masked_store_nocvt(
+ _UAbi::_SimdImpl::_S_masked_store_nocvt(
_UW(__converted[_NFullStores]),
__mem + _NFullStores * _UW_size,
_UAbi::_MaskImpl::template _S_convert<
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 77d2f84ab713..2e301e45677e 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -1106,31 +1106,6 @@ template 
else
  _mm512_mask_storeu_pd(__mem, __k, __vi);
  }
-#if 0 // with KNL either sizeof(_Tp) >= 4 or sizeof(_vi) <= 32
-  // with Skylake-AVX512, __have_avx512bw is true
- else if constexpr (__have_sse2)
-   {
- using _M   = __vector_type_t<_Tp, _Np>;
- using _MVT = _VectorTraits<_M>;
- _mm_maskmoveu_si128(__auto_bitcast(__extract<0, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template 
_S_convert<_Tp, _Np>(__k._M_data)),
- reinterpret_cast(__mem));
- _mm_maskmoveu_si128(__auto_bitcast(__extract<1, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template 
_S_convert<_Tp, _Np>(
-   __k._M_data >> 1 * _MVT::_S_full_size)),
- reinterpret_cast(__mem) + 1 * 16);
- _mm_maskmoveu_si128(__auto_bitcast(__extract<2, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template 
_S_convert<_Tp, _Np>(
-   __k._M_data >> 2 * _MVT::_S_full_size)),
- reinterpret_cast(__mem) + 2 * 16);
- if constexpr (_Np > 48 / sizeof(_Tp))
-   _mm_maskmoveu_si128(
- __auto_bitcast(__extract<3, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template _S_convert<_Tp, _Np>(
-   __k._M_data >> 3 * _MVT::_S_full_size)),
- reinterpret_cast(__mem) + 3 * 16);
-   }
-#endif
else
  __asse

[gcc r13-8708] libstdc++: Add masked ++/-- implementation for sizeof < 16

2024-05-07 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:916f3b3802bf84083142149165da95c113d5d2e2

commit r13-8708-g916f3b3802bf84083142149165da95c113d5d2e2
Author: Matthias Kretz 
Date:   Wed Mar 27 13:41:25 2024 +0100

libstdc++: Add masked ++/-- implementation for sizeof < 16

This resolves further failures (-Wreturn-type warnings) and test
failures for where-* tests targeting AVX-512.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h (_S_masked_unary):
Cast inputs < 16 bytes to 16 byte vectors before calling the
right subtraction builtin. Before returning, truncate to the
return vector type.

(cherry picked from commit a6c630c314b099f64d79055964d88b257459cf13)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 24 +--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 81f46adb8ca2..79c246455ee4 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -3508,6 +3508,9 @@ template 
 #ifdef __clang__
return __movm<_Np, _Tp>(__k._M_data) ? __v._M_data - __pm_one : 
__v._M_data;
 #else // __clang__
+   using _TV = __vector_type_t<_Tp, _Np>;
+   constexpr size_t __bytes = sizeof(__v) < 16 ? 16 : sizeof(__v);
+   constexpr size_t __width = __bytes / sizeof(_Tp);
if constexpr (is_integral_v<_Tp>)
  {
constexpr bool __lp64 = sizeof(long) == sizeof(long long);
@@ -3517,11 +3520,11 @@ template 
  std::conditional_t<__lp64, long long, int>,
  std::conditional_t<
std::is_same_v<_Ip, signed char>, char, _Ip>>;
-   const auto __value = __vector_bitcast<_Up>(__v._M_data);
+   const auto __value = __intrin_bitcast<__vector_type_t<_Up, 
__width>>(__v._M_data);
 #define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
-  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
-return __vector_bitcast<_Tp>(__builtin_ia32_##_Instr##_mask(__value,   
\
-__vector_broadcast<_Np>(_Up(__pm_one)), __value, __k._M_data))
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width)   
\
+return __intrin_bitcast<_TV>(__builtin_ia32_##_Instr##_mask(__value,   
\
+__vector_broadcast<__width>(_Up(__pm_one)), __value, __k._M_data))
_GLIBCXX_SIMD_MASK_SUB(1, 64, psubb512);
_GLIBCXX_SIMD_MASK_SUB(1, 32, psubb256);
_GLIBCXX_SIMD_MASK_SUB(1, 16, psubb128);
@@ -3538,16 +3541,17 @@ template 
  }
else
  {
+   const auto __value = __intrin_bitcast<__vector_type_t<_Tp, 
__width>>(__v._M_data);
 #define _GLIBCXX_SIMD_MASK_SUB_512(_Sizeof, _Width, _Instr)
\
-  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width)   
\
 return __builtin_ia32_##_Instr##_mask( 
\
-__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
+__value, __vector_broadcast<__width>(_Tp(__pm_one)), __value, \
 __k._M_data, _MM_FROUND_CUR_DIRECTION)
 #define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
-  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
-return __builtin_ia32_##_Instr##_mask( 
\
-__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
-__k._M_data)
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width)   
\
+return __intrin_bitcast<_TV>(__builtin_ia32_##_Instr##_mask(   
\
+__value, __vector_broadcast<__width>(_Tp(__pm_one)), __value, \
+__k._M_data))
_GLIBCXX_SIMD_MASK_SUB_512(4, 64, subps512);
_GLIBCXX_SIMD_MASK_SUB(4, 32, subps256);
_GLIBCXX_SIMD_MASK_SUB(4, 16, subps128);


[gcc r13-8712] libstdc++: Fix conversion of simd to vector builtin

2024-05-07 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:6663f2a04c16288124d5ef918c661260e4f66284

commit r13-8712-g6663f2a04c16288124d5ef918c661260e4f66284
Author: Matthias Kretz 
Date:   Mon Apr 22 16:12:34 2024 +0200

libstdc++: Fix conversion of simd to vector builtin

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/114803
* include/experimental/bits/simd_builtin.h
(_SimdBase2::operator __vector_type_t): There is no __builtin()
function in _SimdWrapper, instead use its conversion operator.
* testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc: New
test.

(cherry picked from commit 7ef139146a8923a8719873ca3fdae175668e8d63)

Diff:
---
 .../include/experimental/bits/simd_builtin.h   |   2 +-
 .../experimental/simd/pr114803_vecbuiltin_cvt.cc   | 105 +
 2 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 66ece9679b0c..2fa1cf487501 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -841,7 +841,7 @@ template 
 
   _GLIBCXX_SIMD_ALWAYS_INLINE explicit
   operator __vector_type_t<_Tp, _Np>() const
-  { return static_cast*>(this)->_M_data.__builtin(); 
}
+  { return __data(*static_cast*>(this)); }
 };
 
 struct _SimdBase1
diff --git 
a/libstdc++-v3/testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc 
b/libstdc++-v3/testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc
new file mode 100644
index ..103dd19394c1
--- /dev/null
+++ b/libstdc++-v3/testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc
@@ -0,0 +1,105 @@
+// { dg-options "-std=gnu++17" }
+// { dg-do compile { target c++17 } }
+
+#include 
+
+template 
+  void
+  maybe_test()
+  {
+using V = std::experimental::simd>;
+if constexpr (std::is_destructible_v)
+  {
+   using V2 [[gnu::vector_size(16)]] = T;
+   V x = {};
+   V2 x2 = static_cast(x);
+   x = static_cast(x2);
+   for (unsigned i = 0; i < V::size(); ++i)
+ {
+   if (x2[i] != 0)
+ __builtin_abort();
+ }
+#ifdef __SSE__
+   if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__m128>(x));
+   else if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__m128d>(x));
+   else if constexpr (std::is_integral_v)
+ x = static_cast(static_cast<__m128i>(x));
+#elif __ALTIVEC__
+   if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__vector float>(x));
+#ifdef __VSX__
+   else if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__vector double>(x));
+#endif
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(signed 
char)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed char>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(signed 
char))
+ x = static_cast(static_cast<__vector unsigned char>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(short)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed short>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(short))
+ x = static_cast(static_cast<__vector unsigned short>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(int)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed int>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(int))
+ x = static_cast(static_cast<__vector unsigned int>(x));
+#ifdef __VSX__
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(long 
long)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed long long>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(long 
long))
+ x = static_cast(static_cast<__vector unsigned long long>(x));
+#endif
+#elif __ARM_NEON
+   if constexpr (std::is_same_v)
+ x = static_cast(static_cast(x));
+#ifdef __aarch64__
+   else if constexpr (std::is_same_v)
+ x = static_cast(static_cast(x));
+#endif
+   else if constexpr (std::is_integral_v && sizeof(T) == 1 && 
std::is_signed_v)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 1)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 2 && 
std::is_signed_v)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 2)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 4 && 
std::is_signed_v)
+ x 

[gcc r13-8707] libstdc++: Fix call signature of builtins from masked ++/--

2024-05-07 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:a46218ab09c2b5cd0028d202780d076693abbfe8

commit r13-8707-ga46218ab09c2b5cd0028d202780d076693abbfe8
Author: Matthias Kretz 
Date:   Wed Mar 27 08:49:43 2024 +0100

libstdc++: Fix call signature of builtins from masked ++/--

This resolves failures in the "expensive" where-* test of check-simd
when targeting AVX-512.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h (_S_masked_unary): Call
the 4- and 8-byte variants of __builtin_ia32_subp[ds] without
rounding direction argument.

(cherry picked from commit 0ac2c0f0687b321ab54de271d788b4e0a287b4e2)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 2e301e45677e..81f46adb8ca2 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -3538,17 +3538,23 @@ template 
  }
else
  {
-#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
+#define _GLIBCXX_SIMD_MASK_SUB_512(_Sizeof, _Width, _Instr)
\
   if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
 return __builtin_ia32_##_Instr##_mask( 
\
 __v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
 __k._M_data, _MM_FROUND_CUR_DIRECTION)
-   _GLIBCXX_SIMD_MASK_SUB(4, 64, subps512);
+#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
+return __builtin_ia32_##_Instr##_mask( 
\
+__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
+__k._M_data)
+   _GLIBCXX_SIMD_MASK_SUB_512(4, 64, subps512);
_GLIBCXX_SIMD_MASK_SUB(4, 32, subps256);
_GLIBCXX_SIMD_MASK_SUB(4, 16, subps128);
-   _GLIBCXX_SIMD_MASK_SUB(8, 64, subpd512);
+   _GLIBCXX_SIMD_MASK_SUB_512(8, 64, subpd512);
_GLIBCXX_SIMD_MASK_SUB(8, 32, subpd256);
_GLIBCXX_SIMD_MASK_SUB(8, 16, subpd128);
+#undef _GLIBCXX_SIMD_MASK_SUB_512
 #undef _GLIBCXX_SIMD_MASK_SUB
  }
 #endif // __clang__


[gcc r13-8711] libstdc++: Silence irrelevant warnings in

2024-05-07 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:83636ae013d55047c05ed9ce66390aefb02f0915

commit r13-8711-g83636ae013d55047c05ed9ce66390aefb02f0915
Author: Matthias Kretz 
Date:   Wed Apr 17 10:35:47 2024 +0200

libstdc++: Silence irrelevant warnings in 

Avoid
-Wnarrowing in C code;
-Wtautological-compare in unconditional static_assert (necessary for
faking a dependency on a template parameter)

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h: Ignore -Wnarrowing for
arm_neon.h.
(__int_for_sizeof): Replace tautological compare with checking
for invalid template parameter value.
* include/experimental/bits/simd_builtin.h (__extract_part):
Remove tautological compare by combining two static_assert.

(cherry picked from commit e7a3ad29c9c832b6ae999cbfb0af89e121959030)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 8 +++-
 libstdc++-v3/include/experimental/bits/simd_builtin.h | 3 +--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 90523ea57dc7..63cc7bef610b 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -43,7 +43,12 @@
 #if _GLIBCXX_SIMD_X86INTRIN
 #include 
 #elif _GLIBCXX_SIMD_HAVE_NEON
+#pragma GCC diagnostic push
+// narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned 
int'} to
+//   'int64x1_t' {aka 'long long int'} [-Wnarrowing]
+#pragma GCC diagnostic ignored "-Wnarrowing"
 #include 
+#pragma GCC diagnostic pop
 #endif
 
 /** @ingroup ts_simd
@@ -576,6 +581,7 @@ template 
   constexpr auto
   __int_for_sizeof()
   {
+static_assert(_Bytes > 0);
 if constexpr (_Bytes == sizeof(int))
   return int();
   #ifdef __clang__
@@ -641,7 +647,7 @@ template 
return _Ip{};
   }
 else
-  static_assert(_Bytes != _Bytes, "this should be unreachable");
+  static_assert(_Bytes == 0, "this should be unreachable");
   }
 #pragma GCC diagnostic pop
 
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index e6750fea909e..66ece9679b0c 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -278,8 +278,7 @@ template 
   __extract_part(const _SimdWrapper __x)
   {
 static_assert(_Combine == 1, "_Combine != 1 not implemented");
-static_assert(__have_avx512f && _Np == _Np);
-static_assert(_Total >= 2 && _Index + _Combine <= _Total && _Index >= 0);
+static_assert(__have_avx512f && _Total >= 2 && _Index + _Combine <= _Total 
&& _Index >= 0);
 return __x._M_data >> (_Index * _Np / _Total);
   }


[gcc r13-8706] libstdc++: Avoid vector casts while still avoiding PR90424

2024-05-07 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:4d7a770f2d1dc9d7e7b79788c201ba22bd52cf06

commit r13-8706-g4d7a770f2d1dc9d7e7b79788c201ba22bd52cf06
Author: Matthias Kretz 
Date:   Fri Jun 2 21:33:04 2023 +0200

libstdc++: Avoid vector casts while still avoiding PR90424

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109822
* include/experimental/bits/simd_builtin.h (_S_store): Rewrite
to avoid casts to other vector types. Implement store as
succession of power-of-2 sized memcpy to avoid PR90424.

(cherry picked from commit 9165ede56ababd6471e7a2ce4eab30f3d5129e14)

Diff:
---
 .../include/experimental/bits/simd_builtin.h   | 40 --
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 64ef6efaf8ca..6ccc2fcec9c8 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1295,6 +1295,18 @@ struct _CommonImplBuiltin
 
   // }}}
   // _S_store {{{
+  template 
+_GLIBCXX_SIMD_INTRINSIC static void
+_S_memcpy(char* __dst, const char* __src)
+{
+  if constexpr (_Bytes > 0)
+   {
+ constexpr size_t _Ns = std::__bit_floor(_Bytes);
+ __builtin_memcpy(__dst, __src, _Ns);
+ _S_memcpy<_Bytes - _Ns>(__dst + _Ns, __src + _Ns);
+   }
+}
+
   template 
 _GLIBCXX_SIMD_INTRINSIC static void
 _S_store(_TV __x, void* __addr)
@@ -1302,33 +1314,11 @@ struct _CommonImplBuiltin
   constexpr size_t _Bytes = _ReqBytes == 0 ? sizeof(__x) : _ReqBytes;
   static_assert(sizeof(__x) >= _Bytes);
 
+#if !defined __clang__ && _GLIBCXX_SIMD_WORKAROUND_PR90424
   if constexpr (__is_vector_type_v<_TV>)
-   {
- using _Tp = typename _VectorTraits<_TV>::value_type;
- constexpr size_t _Np = _Bytes / sizeof(_Tp);
- static_assert(_Np * sizeof(_Tp) == _Bytes);
-
-#ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
- using _Up = conditional_t<
-   (is_integral_v<_Tp> || _Bytes < 4),
-   conditional_t<(sizeof(__x) > sizeof(long long)), long long, _Tp>,
-   float>;
- const auto __v = __vector_bitcast<_Up>(__x);
-#else // _GLIBCXX_SIMD_WORKAROUND_PR90424
- const __vector_type_t<_Tp, _Np> __v = __x;
-#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
-
- if constexpr ((_Bytes & (_Bytes - 1)) != 0)
-   {
- constexpr size_t _MoreBytes = std::__bit_ceil(_Bytes);
- alignas(decltype(__v)) char __tmp[_MoreBytes];
- __builtin_memcpy(__tmp, &__v, _MoreBytes);
- __builtin_memcpy(__addr, __tmp, _Bytes);
-   }
- else
-   __builtin_memcpy(__addr, &__v, _Bytes);
-   }
+   _S_memcpy<_Bytes>(reinterpret_cast(__addr), 
reinterpret_cast(&__x));
   else
+#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
__builtin_memcpy(__addr, &__x, _Bytes);
 }


[gcc r13-8709] libstdc++: Avoid ill-formed types on ARM

2024-05-07 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:4505045dadaa58b0f165479d038d8f0247aea8cc

commit r13-8709-g4505045dadaa58b0f165479d038d8f0247aea8cc
Author: Matthias Kretz 
Date:   Wed Apr 17 09:11:25 2024 +0200

libstdc++: Avoid ill-formed types on ARM

This resolves failing tests in check-simd.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/114750
* include/experimental/bits/simd_builtin.h
(_SimdImplBuiltin::_S_load, _S_store): Fall back to copying
scalars if the memory type cannot be vectorized for the target.

(cherry picked from commit 0fc7f3c6adc8543f55ec35b309016d9d9c4ddd35)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_builtin.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 6ccc2fcec9c8..e6750fea909e 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1464,7 +1464,7 @@ template 
   [&](auto __i) constexpr {
 return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
   });
-   else if constexpr (sizeof(_Up) > 8)
+   else if constexpr (sizeof(_Up) > 8 or __vectorized_sizeof<_Up>() <= 
sizeof(_Up))
  return __generate_vector<_Tp, _SimdMember<_Tp>::_S_full_size>(
   [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
 return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
@@ -1536,7 +1536,7 @@ template 
for (size_t __i = 0; __i < _Np; ++__i)
  __mem[__i] = __v[__i];
  }
-   else if constexpr (sizeof(_Up) > 8)
+   else if constexpr (sizeof(_Up) > 8 or __vectorized_sizeof<_Up>() <= 
sizeof(_Up))
  __execute_n_times<_Np>([&](auto __i) constexpr 
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
__mem[__i] = __v[__i];
  });


[gcc r12-10422] libstdc++: Replace use of incorrect non-temporal store

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:e60ec9b0e02e8647c289d204342e992e91750011

commit r12-10422-ge60ec9b0e02e8647c289d204342e992e91750011
Author: Matthias Kretz 
Date:   Fri Jun 2 13:44:22 2023 +0200

libstdc++: Replace use of incorrect non-temporal store

The call to the base implementation sometimes didn't find a matching
signature because the _Abi parameter of _SimdImpl* was "wrong" after
conversion. It has to call into ::_SimdImpl instead of the
current ABI tag's _SimdImpl. This also reduces the number of possible
template instantiations.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/110054
* include/experimental/bits/simd_builtin.h (_S_masked_store):
Call into deduced ABI's SimdImpl after conversion.
* include/experimental/bits/simd_x86.h (_S_masked_store_nocvt):
Don't use _mm_maskmoveu_si128. Use the generic fall-back
implementation. Also fix masked stores without SSE2, which
were not doing anything before.

(cherry picked from commit 27e45b7597d6fb1a71927d658a0294797b720c0a)

Diff:
---
 .../include/experimental/bits/simd_builtin.h   |  6 ++--
 libstdc++-v3/include/experimental/bits/simd_x86.h  | 38 +++---
 2 files changed, 7 insertions(+), 37 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 9ea6259bfda2..8923a82da39e 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1628,7 +1628,7 @@ template 
if constexpr (_UW_size == _TV_size) // one convert+store
  {
const _UW __converted = __convert<_UW>(__v);
-   _SuperImpl::_S_masked_store_nocvt(
+   _UAbi::_SimdImpl::_S_masked_store_nocvt(
  __converted, __mem,
  _UAbi::_MaskImpl::template _S_convert<
__int_for_sizeof_t<_Up>>(__k));
@@ -1643,7 +1643,7 @@ template 
const array<_UV, _NAllStores> __converted
  = __convert_all<_UV, _NAllStores>(__v);
__execute_n_times<_NFullStores>([&](auto __i) 
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
- _SuperImpl::_S_masked_store_nocvt(
+ _UAbi::_SimdImpl::_S_masked_store_nocvt(
_UW(__converted[__i]), __mem + __i * _UW_size,
_UAbi::_MaskImpl::template _S_convert<
  __int_for_sizeof_t<_Up>>(
@@ -1651,7 +1651,7 @@ template 
});
if constexpr (_NAllStores
  > _NFullStores) // one partial at the end
- _SuperImpl::_S_masked_store_nocvt(
+ _UAbi::_SimdImpl::_S_masked_store_nocvt(
_UW(__converted[_NFullStores]),
__mem + _NFullStores * _UW_size,
_UAbi::_MaskImpl::template _S_convert<
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 557462893964..03febe7044c9 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -1106,31 +1106,6 @@ template 
else
  _mm512_mask_storeu_pd(__mem, __k, __vi);
  }
-#if 0 // with KNL either sizeof(_Tp) >= 4 or sizeof(_vi) <= 32
-  // with Skylake-AVX512, __have_avx512bw is true
- else if constexpr (__have_sse2)
-   {
- using _M   = __vector_type_t<_Tp, _Np>;
- using _MVT = _VectorTraits<_M>;
- _mm_maskmoveu_si128(__auto_bitcast(__extract<0, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template 
_S_convert<_Tp, _Np>(__k._M_data)),
- reinterpret_cast(__mem));
- _mm_maskmoveu_si128(__auto_bitcast(__extract<1, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template 
_S_convert<_Tp, _Np>(
-   __k._M_data >> 1 * _MVT::_S_full_size)),
- reinterpret_cast(__mem) + 1 * 16);
- _mm_maskmoveu_si128(__auto_bitcast(__extract<2, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template 
_S_convert<_Tp, _Np>(
-   __k._M_data >> 2 * _MVT::_S_full_size)),
- reinterpret_cast(__mem) + 2 * 16);
- if constexpr (_Np > 48 / sizeof(_Tp))
-   _mm_maskmoveu_si128(
- __auto_bitcast(__extract<3, 4>(__v._M_data)),
- __auto_bitcast(_MaskImpl::template _S_convert<_Tp, _Np>(
-   __k._M_data >> 3 * _MVT::_S_full_size)),
- reinterpret_cast(__mem) + 3 * 16);
-   }
-#endif
else
  __ass

[gcc r12-10423] libstdc++: Avoid vector casts while still avoiding PR90424

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:44df51e5826747d089a08fa1a1378454a8d2d0de

commit r12-10423-g44df51e5826747d089a08fa1a1378454a8d2d0de
Author: Matthias Kretz 
Date:   Fri Jun 2 21:33:04 2023 +0200

libstdc++: Avoid vector casts while still avoiding PR90424

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/109822
* include/experimental/bits/simd_builtin.h (_S_store): Rewrite
to avoid casts to other vector types. Implement store as
succession of power-of-2 sized memcpy to avoid PR90424.

(cherry picked from commit 9165ede56ababd6471e7a2ce4eab30f3d5129e14)

Diff:
---
 .../include/experimental/bits/simd_builtin.h   | 40 --
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 8923a82da39e..51034fec6931 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1295,6 +1295,18 @@ struct _CommonImplBuiltin
 
   // }}}
   // _S_store {{{
+  template 
+_GLIBCXX_SIMD_INTRINSIC static void
+_S_memcpy(char* __dst, const char* __src)
+{
+  if constexpr (_Bytes > 0)
+   {
+ constexpr size_t _Ns = std::__bit_floor(_Bytes);
+ __builtin_memcpy(__dst, __src, _Ns);
+ _S_memcpy<_Bytes - _Ns>(__dst + _Ns, __src + _Ns);
+   }
+}
+
   template 
 _GLIBCXX_SIMD_INTRINSIC static void
 _S_store(_TV __x, void* __addr)
@@ -1302,33 +1314,11 @@ struct _CommonImplBuiltin
   constexpr size_t _Bytes = _ReqBytes == 0 ? sizeof(__x) : _ReqBytes;
   static_assert(sizeof(__x) >= _Bytes);
 
+#if !defined __clang__ && _GLIBCXX_SIMD_WORKAROUND_PR90424
   if constexpr (__is_vector_type_v<_TV>)
-   {
- using _Tp = typename _VectorTraits<_TV>::value_type;
- constexpr size_t _Np = _Bytes / sizeof(_Tp);
- static_assert(_Np * sizeof(_Tp) == _Bytes);
-
-#ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
- using _Up = conditional_t<
-   (is_integral_v<_Tp> || _Bytes < 4),
-   conditional_t<(sizeof(__x) > sizeof(long long)), long long, _Tp>,
-   float>;
- const auto __v = __vector_bitcast<_Up>(__x);
-#else // _GLIBCXX_SIMD_WORKAROUND_PR90424
- const __vector_type_t<_Tp, _Np> __v = __x;
-#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
-
- if constexpr ((_Bytes & (_Bytes - 1)) != 0)
-   {
- constexpr size_t _MoreBytes = std::__bit_ceil(_Bytes);
- alignas(decltype(__v)) char __tmp[_MoreBytes];
- __builtin_memcpy(__tmp, &__v, _MoreBytes);
- __builtin_memcpy(__addr, __tmp, _Bytes);
-   }
- else
-   __builtin_memcpy(__addr, &__v, _Bytes);
-   }
+   _S_memcpy<_Bytes>(reinterpret_cast(__addr), 
reinterpret_cast(&__x));
   else
+#endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
__builtin_memcpy(__addr, &__x, _Bytes);
 }


[gcc r12-10424] libstdc++: Fix call signature of builtins from masked ++/--

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:7e40721ebd07317aa5e8704549401ba4bcde2e2f

commit r12-10424-g7e40721ebd07317aa5e8704549401ba4bcde2e2f
Author: Matthias Kretz 
Date:   Wed Mar 27 08:49:43 2024 +0100

libstdc++: Fix call signature of builtins from masked ++/--

This resolves failures in the "expensive" where-* test of check-simd
when targeting AVX-512.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h (_S_masked_unary): Call
the 4- and 8-byte variants of __builtin_ia32_subp[ds] without
rounding direction argument.

(cherry picked from commit 0ac2c0f0687b321ab54de271d788b4e0a287b4e2)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 03febe7044c9..90b7fca26e2b 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -3538,17 +3538,23 @@ template 
  }
else
  {
-#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
+#define _GLIBCXX_SIMD_MASK_SUB_512(_Sizeof, _Width, _Instr)
\
   if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
 return __builtin_ia32_##_Instr##_mask( 
\
 __v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
 __k._M_data, _MM_FROUND_CUR_DIRECTION)
-   _GLIBCXX_SIMD_MASK_SUB(4, 64, subps512);
+#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
+return __builtin_ia32_##_Instr##_mask( 
\
+__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
+__k._M_data)
+   _GLIBCXX_SIMD_MASK_SUB_512(4, 64, subps512);
_GLIBCXX_SIMD_MASK_SUB(4, 32, subps256);
_GLIBCXX_SIMD_MASK_SUB(4, 16, subps128);
-   _GLIBCXX_SIMD_MASK_SUB(8, 64, subpd512);
+   _GLIBCXX_SIMD_MASK_SUB_512(8, 64, subpd512);
_GLIBCXX_SIMD_MASK_SUB(8, 32, subpd256);
_GLIBCXX_SIMD_MASK_SUB(8, 16, subpd128);
+#undef _GLIBCXX_SIMD_MASK_SUB_512
 #undef _GLIBCXX_SIMD_MASK_SUB
  }
 #endif // __clang__


[gcc r12-10426] libstdc++: Avoid ill-formed types on ARM

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:b3097e01cd5d32e9825b7b87dff12b3247bd9819

commit r12-10426-gb3097e01cd5d32e9825b7b87dff12b3247bd9819
Author: Matthias Kretz 
Date:   Wed Apr 17 09:11:25 2024 +0200

libstdc++: Avoid ill-formed types on ARM

This resolves failing tests in check-simd.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/114750
* include/experimental/bits/simd_builtin.h
(_SimdImplBuiltin::_S_load, _S_store): Fall back to copying
scalars if the memory type cannot be vectorized for the target.

(cherry picked from commit 0fc7f3c6adc8543f55ec35b309016d9d9c4ddd35)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_builtin.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 51034fec6931..e060816c6eac 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -1464,7 +1464,7 @@ template 
   [&](auto __i) constexpr {
 return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
   });
-   else if constexpr (sizeof(_Up) > 8)
+   else if constexpr (sizeof(_Up) > 8 or __vectorized_sizeof<_Up>() <= 
sizeof(_Up))
  return __generate_vector<_Tp, _SimdMember<_Tp>::_S_full_size>(
   [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
 return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
@@ -1536,7 +1536,7 @@ template 
for (size_t __i = 0; __i < _Np; ++__i)
  __mem[__i] = __v[__i];
  }
-   else if constexpr (sizeof(_Up) > 8)
+   else if constexpr (sizeof(_Up) > 8 or __vectorized_sizeof<_Up>() <= 
sizeof(_Up))
  __execute_n_times<_Np>([&](auto __i) constexpr 
_GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
__mem[__i] = __v[__i];
  });


[gcc r12-10428] libstdc++: Silence irrelevant warnings in

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:79aa69663cfbac30b76c85a8e4856b0865749ef7

commit r12-10428-g79aa69663cfbac30b76c85a8e4856b0865749ef7
Author: Matthias Kretz 
Date:   Wed Apr 17 10:35:47 2024 +0200

libstdc++: Silence irrelevant warnings in 

Avoid
-Wnarrowing in C code;
-Wtautological-compare in unconditional static_assert (necessary for
faking a dependency on a template parameter)

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h: Ignore -Wnarrowing for
arm_neon.h.
(__int_for_sizeof): Replace tautological compare with checking
for invalid template parameter value.
* include/experimental/bits/simd_builtin.h (__extract_part):
Remove tautological compare by combining two static_assert.

(cherry picked from commit e7a3ad29c9c832b6ae999cbfb0af89e121959030)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 8 +++-
 libstdc++-v3/include/experimental/bits/simd_builtin.h | 3 +--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 6f7418a840ea..439545869be8 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -43,7 +43,12 @@
 #if _GLIBCXX_SIMD_X86INTRIN
 #include 
 #elif _GLIBCXX_SIMD_HAVE_NEON
+#pragma GCC diagnostic push
+// narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned 
int'} to
+//   'int64x1_t' {aka 'long long int'} [-Wnarrowing]
+#pragma GCC diagnostic ignored "-Wnarrowing"
 #include 
+#pragma GCC diagnostic pop
 #endif
 
 /** @ingroup ts_simd
@@ -576,6 +581,7 @@ template 
   constexpr auto
   __int_for_sizeof()
   {
+static_assert(_Bytes > 0);
 if constexpr (_Bytes == sizeof(int))
   return int();
   #ifdef __clang__
@@ -641,7 +647,7 @@ template 
return _Ip{};
   }
 else
-  static_assert(_Bytes != _Bytes, "this should be unreachable");
+  static_assert(_Bytes == 0, "this should be unreachable");
   }
 #pragma GCC diagnostic pop
 
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index e060816c6eac..fc8937a6e024 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -278,8 +278,7 @@ template 
   __extract_part(const _SimdWrapper __x)
   {
 static_assert(_Combine == 1, "_Combine != 1 not implemented");
-static_assert(__have_avx512f && _Np == _Np);
-static_assert(_Total >= 2 && _Index + _Combine <= _Total && _Index >= 0);
+static_assert(__have_avx512f && _Total >= 2 && _Index + _Combine <= _Total 
&& _Index >= 0);
 return __x._M_data >> (_Index * _Np / _Total);
   }


[gcc r12-10429] libstdc++: Fix conversion of simd to vector builtin

2024-05-08 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:0ab30fb865e6465156f7db10506b56ef2df19648

commit r12-10429-g0ab30fb865e6465156f7db10506b56ef2df19648
Author: Matthias Kretz 
Date:   Mon Apr 22 16:12:34 2024 +0200

libstdc++: Fix conversion of simd to vector builtin

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/114803
* include/experimental/bits/simd_builtin.h
(_SimdBase2::operator __vector_type_t): There is no __builtin()
function in _SimdWrapper, instead use its conversion operator.
* testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc: New
test.

(cherry picked from commit 7ef139146a8923a8719873ca3fdae175668e8d63)

Diff:
---
 .../include/experimental/bits/simd_builtin.h   |   2 +-
 .../experimental/simd/pr114803_vecbuiltin_cvt.cc   | 105 +
 2 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index fc8937a6e024..57a5640643da 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -841,7 +841,7 @@ template 
 
   _GLIBCXX_SIMD_ALWAYS_INLINE explicit
   operator __vector_type_t<_Tp, _Np>() const
-  { return static_cast*>(this)->_M_data.__builtin(); 
}
+  { return __data(*static_cast*>(this)); }
 };
 
 struct _SimdBase1
diff --git 
a/libstdc++-v3/testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc 
b/libstdc++-v3/testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc
new file mode 100644
index ..103dd19394c1
--- /dev/null
+++ b/libstdc++-v3/testsuite/experimental/simd/pr114803_vecbuiltin_cvt.cc
@@ -0,0 +1,105 @@
+// { dg-options "-std=gnu++17" }
+// { dg-do compile { target c++17 } }
+
+#include 
+
+template 
+  void
+  maybe_test()
+  {
+using V = std::experimental::simd>;
+if constexpr (std::is_destructible_v)
+  {
+   using V2 [[gnu::vector_size(16)]] = T;
+   V x = {};
+   V2 x2 = static_cast(x);
+   x = static_cast(x2);
+   for (unsigned i = 0; i < V::size(); ++i)
+ {
+   if (x2[i] != 0)
+ __builtin_abort();
+ }
+#ifdef __SSE__
+   if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__m128>(x));
+   else if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__m128d>(x));
+   else if constexpr (std::is_integral_v)
+ x = static_cast(static_cast<__m128i>(x));
+#elif __ALTIVEC__
+   if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__vector float>(x));
+#ifdef __VSX__
+   else if constexpr (std::is_same_v)
+ x = static_cast(static_cast<__vector double>(x));
+#endif
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(signed 
char)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed char>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(signed 
char))
+ x = static_cast(static_cast<__vector unsigned char>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(short)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed short>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(short))
+ x = static_cast(static_cast<__vector unsigned short>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(int)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed int>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(int))
+ x = static_cast(static_cast<__vector unsigned int>(x));
+#ifdef __VSX__
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(long 
long)
+&& std::is_signed_v)
+ x = static_cast(static_cast<__vector signed long long>(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == sizeof(long 
long))
+ x = static_cast(static_cast<__vector unsigned long long>(x));
+#endif
+#elif __ARM_NEON
+   if constexpr (std::is_same_v)
+ x = static_cast(static_cast(x));
+#ifdef __aarch64__
+   else if constexpr (std::is_same_v)
+ x = static_cast(static_cast(x));
+#endif
+   else if constexpr (std::is_integral_v && sizeof(T) == 1 && 
std::is_signed_v)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 1)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 2 && 
std::is_signed_v)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 2)
+ x = static_cast(static_cast(x));
+   else if constexpr (std::is_integral_v && sizeof(T) == 4 && 
std::is_signed_v)
+ x

[gcc r11-11426] libstdc++: Protect against macros

2024-05-10 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:fdd2a21a53facecc8e2b240673f909dae4034e88

commit r11-11426-gfdd2a21a53facecc8e2b240673f909dae4034e88
Author: Matthias Kretz 
Date:   Fri Jun 2 21:21:36 2023 +0200

libstdc++: Protect against macros

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h (__bit_cast): Use
__gnu__::__vector_size__ instead of gnu::vector_size.

(cherry picked from commit ce2188e4320cbb46d6246bd3f478ba20440c62f3)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index d5683bfcacb3..248237731268 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1615,18 +1615,18 @@ template 
   return reinterpret_cast<_To>(__x);
 else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
   {
-   using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
+   using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
return reinterpret_cast<_To>(_FV{__x});
   }
 else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
   {
-   using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
-   using _FV [[gnu::vector_size(sizeof(_From))]] = _From;
+   using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
+   using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
return reinterpret_cast<_TV>(_FV{__x})[0];
   }
 else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
   {
-   using _TV [[gnu::vector_size(sizeof(_To))]] = _To;
+   using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
return reinterpret_cast<_TV>(__x)[0];
   }
 else


[gcc r11-11429] libstdc++: Fix call signature of builtins from masked ++/--

2024-05-10 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:f0b7583dab56c78824344e00eb6230eaaeb09a44

commit r11-11429-gf0b7583dab56c78824344e00eb6230eaaeb09a44
Author: Matthias Kretz 
Date:   Wed Mar 27 08:49:43 2024 +0100

libstdc++: Fix call signature of builtins from masked ++/--

This resolves failures in the "expensive" where-* test of check-simd
when targeting AVX-512.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h (_S_masked_unary): Call
the 4- and 8-byte variants of __builtin_ia32_subp[ds] without
rounding direction argument.

(cherry picked from commit 0ac2c0f0687b321ab54de271d788b4e0a287b4e2)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 2e7ea238811c..fb0d6977832a 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -3537,17 +3537,23 @@ template 
  }
else
  {
-#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
+#define _GLIBCXX_SIMD_MASK_SUB_512(_Sizeof, _Width, _Instr)
\
   if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
 return __builtin_ia32_##_Instr##_mask( 
\
 __v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
 __k._M_data, _MM_FROUND_CUR_DIRECTION)
-   _GLIBCXX_SIMD_MASK_SUB(4, 64, subps512);
+#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
+return __builtin_ia32_##_Instr##_mask( 
\
+__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
+__k._M_data)
+   _GLIBCXX_SIMD_MASK_SUB_512(4, 64, subps512);
_GLIBCXX_SIMD_MASK_SUB(4, 32, subps256);
_GLIBCXX_SIMD_MASK_SUB(4, 16, subps128);
-   _GLIBCXX_SIMD_MASK_SUB(8, 64, subpd512);
+   _GLIBCXX_SIMD_MASK_SUB_512(8, 64, subpd512);
_GLIBCXX_SIMD_MASK_SUB(8, 32, subpd256);
_GLIBCXX_SIMD_MASK_SUB(8, 16, subpd128);
+#undef _GLIBCXX_SIMD_MASK_SUB_512
 #undef _GLIBCXX_SIMD_MASK_SUB
  }
 #endif // __clang__


[gcc r11-11430] libstdc++: Add masked ++/-- implementation for sizeof < 16

2024-05-10 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:dcd598817c1b3c62d782f568ce63e37566228a58

commit r11-11430-gdcd598817c1b3c62d782f568ce63e37566228a58
Author: Matthias Kretz 
Date:   Wed Mar 27 13:41:25 2024 +0100

libstdc++: Add masked ++/-- implementation for sizeof < 16

This resolves further failures (-Wreturn-type warnings) and test
failures for where-* tests targeting AVX-512.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h (_S_masked_unary):
Cast inputs < 16 bytes to 16 byte vectors before calling the
right subtraction builtin. Before returning, truncate to the
return vector type.

(cherry picked from commit a6c630c314b099f64d79055964d88b257459cf13)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 24 +--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index fb0d6977832a..6883ab83f962 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -3507,6 +3507,9 @@ template 
 #ifdef __clang__
return __movm<_Np, _Tp>(__k._M_data) ? __v._M_data - __pm_one : 
__v._M_data;
 #else // __clang__
+   using _TV = __vector_type_t<_Tp, _Np>;
+   constexpr size_t __bytes = sizeof(__v) < 16 ? 16 : sizeof(__v);
+   constexpr size_t __width = __bytes / sizeof(_Tp);
if constexpr (is_integral_v<_Tp>)
  {
constexpr bool __lp64 = sizeof(long) == sizeof(long long);
@@ -3516,11 +3519,11 @@ template 
  std::conditional_t<__lp64, long long, int>,
  std::conditional_t<
std::is_same_v<_Ip, signed char>, char, _Ip>>;
-   const auto __value = __vector_bitcast<_Up>(__v._M_data);
+   const auto __value = __intrin_bitcast<__vector_type_t<_Up, 
__width>>(__v._M_data);
 #define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
-  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
-return __vector_bitcast<_Tp>(__builtin_ia32_##_Instr##_mask(__value,   
\
-__vector_broadcast<_Np>(_Up(__pm_one)), __value, __k._M_data))
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width)   
\
+return __intrin_bitcast<_TV>(__builtin_ia32_##_Instr##_mask(__value,   
\
+__vector_broadcast<__width>(_Up(__pm_one)), __value, __k._M_data))
_GLIBCXX_SIMD_MASK_SUB(1, 64, psubb512);
_GLIBCXX_SIMD_MASK_SUB(1, 32, psubb256);
_GLIBCXX_SIMD_MASK_SUB(1, 16, psubb128);
@@ -3537,16 +3540,17 @@ template 
  }
else
  {
+   const auto __value = __intrin_bitcast<__vector_type_t<_Tp, 
__width>>(__v._M_data);
 #define _GLIBCXX_SIMD_MASK_SUB_512(_Sizeof, _Width, _Instr)
\
-  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width)   
\
 return __builtin_ia32_##_Instr##_mask( 
\
-__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
+__value, __vector_broadcast<__width>(_Tp(__pm_one)), __value, \
 __k._M_data, _MM_FROUND_CUR_DIRECTION)
 #define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
-  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
-return __builtin_ia32_##_Instr##_mask( 
\
-__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
-__k._M_data)
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__value) == _Width)   
\
+return __intrin_bitcast<_TV>(__builtin_ia32_##_Instr##_mask(   
\
+__value, __vector_broadcast<__width>(_Tp(__pm_one)), __value, \
+__k._M_data))
_GLIBCXX_SIMD_MASK_SUB_512(4, 64, subps512);
_GLIBCXX_SIMD_MASK_SUB(4, 32, subps256);
_GLIBCXX_SIMD_MASK_SUB(4, 16, subps128);


[gcc r11-11433] libstdc++: Silence irrelevant warnings in

2024-05-10 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:6679ea120ee06355ba309c53604b23909d325b98

commit r11-11433-g6679ea120ee06355ba309c53604b23909d325b98
Author: Matthias Kretz 
Date:   Wed Apr 17 10:35:47 2024 +0200

libstdc++: Silence irrelevant warnings in 

Avoid
-Wnarrowing in C code;
-Wtautological-compare in unconditional static_assert (necessary for
faking a dependency on a template parameter)

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h: Ignore -Wnarrowing for
arm_neon.h.
(__int_for_sizeof): Replace tautological compare with checking
for invalid template parameter value.
* include/experimental/bits/simd_builtin.h (__extract_part):
Remove tautological compare by combining two static_assert.

(cherry picked from commit e7a3ad29c9c832b6ae999cbfb0af89e121959030)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 8 +++-
 libstdc++-v3/include/experimental/bits/simd_builtin.h | 3 +--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 248237731268..56ec9dd4a84d 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -42,7 +42,12 @@
 #if _GLIBCXX_SIMD_X86INTRIN
 #include 
 #elif _GLIBCXX_SIMD_HAVE_NEON
+#pragma GCC diagnostic push
+// narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned 
int'} to
+//   'int64x1_t' {aka 'long long int'} [-Wnarrowing]
+#pragma GCC diagnostic ignored "-Wnarrowing"
 #include 
+#pragma GCC diagnostic pop
 #endif
 
 /** @ingroup ts_simd
@@ -385,6 +390,7 @@ template 
   constexpr auto
   __int_for_sizeof()
   {
+static_assert(_Bytes > 0);
 if constexpr (_Bytes == sizeof(int))
   return int();
   #ifdef __clang__
@@ -450,7 +456,7 @@ template 
return _Ip{};
   }
 else
-  static_assert(_Bytes != _Bytes, "this should be unreachable");
+  static_assert(_Bytes == 0, "this should be unreachable");
   }
 #pragma GCC diagnostic pop
 
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index 9703012a6d95..42e894ac3b0b 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -276,8 +276,7 @@ template 
   __extract_part(const _SimdWrapper __x)
   {
 static_assert(_Combine == 1, "_Combine != 1 not implemented");
-static_assert(__have_avx512f && _Np == _Np);
-static_assert(_Total >= 2 && _Index + _Combine <= _Total && _Index >= 0);
+static_assert(__have_avx512f && _Total >= 2 && _Index + _Combine <= _Total 
&& _Index >= 0);
 return __x._M_data >> (_Index * _Np / _Total);
   }


[gcc r14-9689] libstdc++: Fix call signature of builtins from masked ++/--

2024-03-27 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:0ac2c0f0687b321ab54de271d788b4e0a287b4e2

commit r14-9689-g0ac2c0f0687b321ab54de271d788b4e0a287b4e2
Author: Matthias Kretz 
Date:   Wed Mar 27 08:49:43 2024 +0100

libstdc++: Fix call signature of builtins from masked ++/--

This resolves failures in the "expensive" where-* test of check-simd
when targeting AVX-512.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd_x86.h (_S_masked_unary): Call
the 4- and 8-byte variants of __builtin_ia32_subp[ds] without
rounding direction argument.

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_x86.h | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 16b207be2a3..6b414486fee 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -3538,17 +3538,23 @@ template 
  }
else
  {
-#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
+#define _GLIBCXX_SIMD_MASK_SUB_512(_Sizeof, _Width, _Instr)
\
   if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
 return __builtin_ia32_##_Instr##_mask( 
\
 __v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
 __k._M_data, _MM_FROUND_CUR_DIRECTION)
-   _GLIBCXX_SIMD_MASK_SUB(4, 64, subps512);
+#define _GLIBCXX_SIMD_MASK_SUB(_Sizeof, _Width, _Instr)
\
+  if constexpr (sizeof(_Tp) == _Sizeof && sizeof(__v) == _Width)   
\
+return __builtin_ia32_##_Instr##_mask( 
\
+__v._M_data, __vector_broadcast<_Np>(_Tp(__pm_one)), __v._M_data, \
+__k._M_data)
+   _GLIBCXX_SIMD_MASK_SUB_512(4, 64, subps512);
_GLIBCXX_SIMD_MASK_SUB(4, 32, subps256);
_GLIBCXX_SIMD_MASK_SUB(4, 16, subps128);
-   _GLIBCXX_SIMD_MASK_SUB(8, 64, subpd512);
+   _GLIBCXX_SIMD_MASK_SUB_512(8, 64, subpd512);
_GLIBCXX_SIMD_MASK_SUB(8, 32, subpd256);
_GLIBCXX_SIMD_MASK_SUB(8, 16, subpd128);
+#undef _GLIBCXX_SIMD_MASK_SUB_512
 #undef _GLIBCXX_SIMD_MASK_SUB
  }
 #endif // __clang__


[gcc r14-10002] libstdc++: Add include guard to simd-internal header

2024-04-17 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:3cfe94ad28102618c14a91c0a83d9e5cc7df69d7

commit r14-10002-g3cfe94ad28102618c14a91c0a83d9e5cc7df69d7
Author: Matthias Kretz 
Date:   Wed Apr 17 10:12:42 2024 +0200

libstdc++: Add include guard to simd-internal header

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/numeric_traits.h: Add include guard.

Diff:
---
 libstdc++-v3/include/experimental/bits/numeric_traits.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/libstdc++-v3/include/experimental/bits/numeric_traits.h 
b/libstdc++-v3/include/experimental/bits/numeric_traits.h
index b195de7c3db..8717f684d96 100644
--- a/libstdc++-v3/include/experimental/bits/numeric_traits.h
+++ b/libstdc++-v3/include/experimental/bits/numeric_traits.h
@@ -22,6 +22,9 @@
 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 // .
 
+#ifndef _GLIBCXX_EXPERIMENTAL_BITS_NUMERIC_TRAITS_H
+#define _GLIBCXX_EXPERIMENTAL_BITS_NUMERIC_TRAITS_H
+
 #include 
 
 namespace std {
@@ -565,3 +568,4 @@ template <>
 #endif // __FINITE_MATH_ONLY__
 
 } // namespace std
+#endif  // _GLIBCXX_EXPERIMENTAL_BITS_NUMERIC_TRAITS_H


[gcc r14-10078] libstdc++: Silence irrelevant warnings in

2024-04-22 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:e7a3ad29c9c832b6ae999cbfb0af89e121959030

commit r14-10078-ge7a3ad29c9c832b6ae999cbfb0af89e121959030
Author: Matthias Kretz 
Date:   Wed Apr 17 10:35:47 2024 +0200

libstdc++: Silence irrelevant warnings in 

Avoid
-Wnarrowing in C code;
-Wtautological-compare in unconditional static_assert (necessary for
faking a dependency on a template parameter)

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

* include/experimental/bits/simd.h: Ignore -Wnarrowing for
arm_neon.h.
(__int_for_sizeof): Replace tautological compare with checking
for invalid template parameter value.
* include/experimental/bits/simd_builtin.h (__extract_part):
Remove tautological compare by combining two static_assert.

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 8 +++-
 libstdc++-v3/include/experimental/bits/simd_builtin.h | 3 +--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 03c2e17a326..6ef9c955cfa 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -44,7 +44,12 @@
 #if _GLIBCXX_SIMD_X86INTRIN
 #include 
 #elif _GLIBCXX_SIMD_HAVE_NEON
+#pragma GCC diagnostic push
+// narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned 
int'} to
+//   'int64x1_t' {aka 'long long int'} [-Wnarrowing]
+#pragma GCC diagnostic ignored "-Wnarrowing"
 #include 
+#pragma GCC diagnostic pop
 #endif
 #if _GLIBCXX_SIMD_HAVE_SVE
 #include 
@@ -598,6 +603,7 @@ template 
   constexpr auto
   __int_for_sizeof()
   {
+static_assert(_Bytes > 0);
 if constexpr (_Bytes == sizeof(int))
   return int();
   #ifdef __clang__
@@ -663,7 +669,7 @@ template 
return _Ip{};
   }
 else
-  static_assert(_Bytes != _Bytes, "this should be unreachable");
+  static_assert(_Bytes == 0, "this should be unreachable");
   }
 #pragma GCC diagnostic pop
 
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h 
b/libstdc++-v3/include/experimental/bits/simd_builtin.h
index af0c4886108..49c7c7e1c70 100644
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -278,8 +278,7 @@ template 
   __extract_part(const _SimdWrapper __x)
   {
 static_assert(_Combine == 1, "_Combine != 1 not implemented");
-static_assert(__have_avx512f && _Np == _Np);
-static_assert(_Total >= 2 && _Index + _Combine <= _Total && _Index >= 0);
+static_assert(__have_avx512f && _Total >= 2 && _Index + _Combine <= _Total 
&& _Index >= 0);
 return __x._M_data >> (_Index * _Np / _Total);
   }


[gcc r14-10296] libstdc++: Fix simd conversion for -fno-signed-char for Clang

2024-06-09 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:489b58b79782fa361c0d7e852e0e684d743c8399

commit r14-10296-g489b58b79782fa361c0d7e852e0e684d743c8399
Author: Matthias Kretz 
Date:   Mon Jun 3 12:02:07 2024 +0200

libstdc++: Fix simd conversion for -fno-signed-char for Clang

The special case for Clang in the trait producing a signed integer type
lead to the trait returning 'char' where it should have been 'signed
char'. This workaround was introduced because on Clang the return type
of vector compares was not convertible to '_SimdWrapper<
__int_for_sizeof_t<...' unless '__int_for_sizeof_t' was an alias
for 'char'. In order to not rewrite the complete mask type code (there
is code scattered around the implementation assuming signed integers),
this needs to be 'signed char'; so the special case for Clang needs to
be removed.
The conversion issue is now solved in _SimdWrapper, which now
additionally allows conversion from vector types with compatible
integral type.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/115308
* include/experimental/bits/simd.h (__int_for_sizeof): Remove
special cases for __clang__.
(_SimdWrapper): Change constructor overload set to allow
conversion from vector types with integral conversions via bit
reinterpretation.

(cherry picked from commit 8e36cf4c5c9140915d001db132a900b48037)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 45 ---
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 7c524625719..cb1f13d8ba6 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -606,19 +606,12 @@ template 
 static_assert(_Bytes > 0);
 if constexpr (_Bytes == sizeof(int))
   return int();
-  #ifdef __clang__
-else if constexpr (_Bytes == sizeof(char))
-  return char();
-  #else
 else if constexpr (_Bytes == sizeof(_SChar))
   return _SChar();
-  #endif
 else if constexpr (_Bytes == sizeof(short))
   return short();
-  #ifndef __clang__
 else if constexpr (_Bytes == sizeof(long))
   return long();
-  #endif
 else if constexpr (_Bytes == sizeof(_LLong))
   return _LLong();
   #ifdef __SIZEOF_INT128__
@@ -2747,6 +2740,8 @@ template 
 
 // }}}
 // _SimdWrapper{{{
+struct _DisabledSimdWrapper;
+
 template 
   struct _SimdWrapper<
 _Tp, _Width,
@@ -2756,16 +2751,17 @@ template 
  == sizeof(__vector_type_t<_Tp, _Width>),
   __vector_type_t<_Tp, _Width>>
   {
-using _Base
-  = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
-  && sizeof(_Tp) * _Width
-   == sizeof(__vector_type_t<_Tp, _Width>),
-__vector_type_t<_Tp, _Width>>;
+static constexpr bool _S_need_default_init
+  = __has_iec559_behavior<__signaling_NaN, _Tp>::value
+ and sizeof(_Tp) * _Width == sizeof(__vector_type_t<_Tp, _Width>);
+
+using _BuiltinType = __vector_type_t<_Tp, _Width>;
+
+using _Base = _SimdWrapperBase<_S_need_default_init, _BuiltinType>;
 
 static_assert(__is_vectorizable_v<_Tp>);
 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
 
-using _BuiltinType = __vector_type_t<_Tp, _Width>;
 using value_type = _Tp;
 
 static inline constexpr size_t _S_full_size
@@ -2801,13 +2797,26 @@ template 
 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
 operator=(_SimdWrapper&&) = default;
 
-template >,
-is_same<_V, __intrinsic_type_t<_Tp, _Width>
+// Convert from exactly matching __vector_type_t
+using _SimdWrapperBase<_S_need_default_init, 
_BuiltinType>::_SimdWrapperBase;
+
+// Convert from __intrinsic_type_t if __intrinsic_type_t and 
__vector_type_t differ, otherwise
+// this ctor should not exist. Making the argument type unusable is our 
next best solution.
+_GLIBCXX_SIMD_INTRINSIC constexpr
+_SimdWrapper(conditional_t>,
+  _DisabledSimdWrapper, __intrinsic_type_t<_Tp, 
_Width>> __x)
+: _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
+
+// Convert from different __vector_type_t, but only if bit 
reinterpretation is a correct
+// conversion of the value_type
+template ,
+ typename = enable_if_t
+  and is_integral_v>>
   _GLIBCXX_SIMD_INTRINSIC constexpr
   _SimdWrapper(_V __x)
-  // __vector_bitcast can convert e.g. __m128 to __vector(2) float
-  : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
+  : _Base(reinterpret_cast<_BuiltinType>(__x)) {}
 
 template  && ...)


[gcc r13-8840] libstdc++: Avoid MMX return types from __builtin_shufflevector

2024-06-11 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:0efc27068e59cac6bd80ff962e92618a037bbfe8

commit r13-8840-g0efc27068e59cac6bd80ff962e92618a037bbfe8
Author: Matthias Kretz 
Date:   Wed May 15 11:02:22 2024 +0200

libstdc++: Avoid MMX return types from __builtin_shufflevector

This resolves a regression on i686 that was introduced with
r15-429-gfb1649f8b4ad50.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/115247
* include/experimental/bits/simd.h (__as_vector): Don't use
vector_size(8) on __i386__.
(__vec_shuffle): Never return MMX vectors, widen to 16 bytes
instead.
(concat): Fix padding calculation to pick up widening logic from
__as_vector.

(cherry picked from commit 241a6cc88d866fb36bd35ddb3edb659453d6322e)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 39 +++
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 6d3209b1a0e..ce7805e43e5 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1630,7 +1630,12 @@ template 
  {
static_assert(is_simd<_V>::value);
using _Tp = typename _V::value_type;
+#ifdef __i386__
+   constexpr auto __bytes = sizeof(_Tp) == 8 ? 16 : sizeof(_Tp);
+   using _RV [[__gnu__::__vector_size__(__bytes)]] = _Tp;
+#else
using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
+#endif
return _RV{__data(__x)};
  }
   }
@@ -2046,11 +2051,14 @@ template >
 // }}}
 // __vec_shuffle{{{
 template 
-  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  _GLIBCXX_SIMD_INTRINSIC constexpr
+  __vector_type_t()[0])>, 
sizeof...(_Is)>
   __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun 
__idx_perm)
   {
 constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
 constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
+using _Tp = remove_reference_t()[0])>;
+using _RV [[maybe_unused]] = __vector_type_t<_Tp, sizeof...(_Is)>;
 #if __has_builtin(__builtin_shufflevector)
 #ifdef __clang__
 // Clang requires _T0 == _T1
@@ -2070,14 +2078,23 @@ template 
 });
 else
 #endif
-  return __builtin_shufflevector(__x, __y, [=] {
-  constexpr int __j = __idx_perm(_Is);
-  static_assert(__j < _N0 + _N1);
-  return __j;
-}()...);
+  {
+   const auto __r = __builtin_shufflevector(__x, __y, [=] {
+  constexpr int __j = __idx_perm(_Is);
+  static_assert(__j < _N0 + _N1);
+  return __j;
+}()...);
+#ifdef __i386__
+   if constexpr (sizeof(__r) == sizeof(_RV))
+ return __r;
+   else
+ return _RV {__r[_Is]...};
+#else
+   return __r;
+#endif
+  }
 #else
-using _Tp = __remove_cvref_t;
-return __vector_type_t<_Tp, sizeof...(_Is)> {
+return _RV {
   [=]() -> _Tp {
constexpr int __j = __idx_perm(_Is);
static_assert(__j < _N0 + _N1);
@@ -4312,9 +4329,9 @@ template 
__vec_shuffle(__as_vector(__xs)..., 
std::make_index_sequence<_RW::_S_full_size>(),
  [](int __i) {
constexpr int __sizes[2] = 
{int(simd_size_v<_Tp, _As>)...};
-   constexpr int __padding0
- = sizeof(__vector_type_t<_Tp, __sizes[0]>) / 
sizeof(_Tp)
- - __sizes[0];
+   constexpr int __vsizes[2]
+ = {int(sizeof(__as_vector(__xs)) / 
sizeof(_Tp))...};
+   constexpr int __padding0 = __vsizes[0] - 
__sizes[0];
return __i >= _Np ? -1 : __i < __sizes[0] ? __i 
: __i + __padding0;
  })};
   }


[gcc r15-1500] libstdc++: Fix find_last_set(simd_mask) to ignore padding bits

2024-06-20 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:1340ddea0158de3f49aeb75b4013e5fc313ff6f4

commit r15-1500-g1340ddea0158de3f49aeb75b4013e5fc313ff6f4
Author: Matthias Kretz 
Date:   Fri Jun 14 15:11:25 2024 +0200

libstdc++: Fix find_last_set(simd_mask) to ignore padding bits

With the change to the AVX512 find_last_set implementation, the change
to AVX512 operator!= is unnecessary. However, the latter was not
producing optimal code and unnecessarily set the padding bits. In
theory, the compiler could determine that with the new !=
implementation, the bit operation for clearing the padding bits is a
no-op and can be elided.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/115454
* include/experimental/bits/simd_x86.h (_S_not_equal_to): Use
neq comparison instead of bitwise negation after eq.
(_S_find_last_set): Clear unused high bits before computing
bit_width.
* testsuite/experimental/simd/pr115454_find_last_set.cc: New
test.

Diff:
---
 libstdc++-v3/include/experimental/bits/simd_x86.h  | 26 ++--
 .../experimental/simd/pr115454_find_last_set.cc| 49 ++
 2 files changed, 62 insertions(+), 13 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h 
b/libstdc++-v3/include/experimental/bits/simd_x86.h
index 4ab933b573c6..e498b1e4ee4d 100644
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -2339,29 +2339,29 @@ template 
  __assert_unreachable<_Tp>();
  }
else if constexpr (sizeof(__xi) == 64 && sizeof(_Tp) == 8)
- return ~_mm512_mask_cmpeq_epi64_mask(__k1, __xi, __yi);
+ return _mm512_mask_cmpneq_epi64_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 64 && sizeof(_Tp) == 4)
- return ~_mm512_mask_cmpeq_epi32_mask(__k1, __xi, __yi);
+ return _mm512_mask_cmpneq_epi32_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 64 && sizeof(_Tp) == 2)
- return ~_mm512_mask_cmpeq_epi16_mask(__k1, __xi, __yi);
+ return _mm512_mask_cmpneq_epi16_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 64 && sizeof(_Tp) == 1)
- return ~_mm512_mask_cmpeq_epi8_mask(__k1, __xi, __yi);
+ return _mm512_mask_cmpneq_epi8_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 32 && sizeof(_Tp) == 8)
- return ~_mm256_mask_cmpeq_epi64_mask(__k1, __xi, __yi);
+ return _mm256_mask_cmpneq_epi64_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 32 && sizeof(_Tp) == 4)
- return ~_mm256_mask_cmpeq_epi32_mask(__k1, __xi, __yi);
+ return _mm256_mask_cmpneq_epi32_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 32 && sizeof(_Tp) == 2)
- return ~_mm256_mask_cmpeq_epi16_mask(__k1, __xi, __yi);
+ return _mm256_mask_cmpneq_epi16_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 32 && sizeof(_Tp) == 1)
- return ~_mm256_mask_cmpeq_epi8_mask(__k1, __xi, __yi);
+ return _mm256_mask_cmpneq_epi8_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 16 && sizeof(_Tp) == 8)
- return ~_mm_mask_cmpeq_epi64_mask(__k1, __xi, __yi);
+ return _mm_mask_cmpneq_epi64_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 16 && sizeof(_Tp) == 4)
- return ~_mm_mask_cmpeq_epi32_mask(__k1, __xi, __yi);
+ return _mm_mask_cmpneq_epi32_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 16 && sizeof(_Tp) == 2)
- return ~_mm_mask_cmpeq_epi16_mask(__k1, __xi, __yi);
+ return _mm_mask_cmpneq_epi16_mask(__k1, __xi, __yi);
else if constexpr (sizeof(__xi) == 16 && sizeof(_Tp) == 1)
- return ~_mm_mask_cmpeq_epi8_mask(__k1, __xi, __yi);
+ return _mm_mask_cmpneq_epi8_mask(__k1, __xi, __yi);
else
  __assert_unreachable<_Tp>();
  }   // }}}
@@ -5292,7 +5292,7 @@ template 
   _S_find_last_set(simd_mask<_Tp, _Abi> __k)
   {
if constexpr (__is_avx512_abi<_Abi>())
- return std::__bit_width(__k._M_data._M_data) - 1;
+ return std::__bit_width(_Abi::_S_masked(__k._M_data)._M_data) - 1;
else
  return _Base::_S_find_last_set(__k);
   }
diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc 
b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
new file mode 100644
index ..b47f19d30674
--- /dev/null
+++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
@@ -0,0 +1,49 @@
+// { dg-options "-std=gnu++17" }
+// { dg-do run { target *-*-* }

[gcc r12-10570] libstdc++: Use __builtin_shufflevector for simd split and concat

2024-06-20 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:fb067547e401940b433cf0d2ae30749b4c21492e

commit r12-10570-gfb067547e401940b433cf0d2ae30749b4c21492e
Author: Matthias Kretz 
Date:   Mon May 6 12:13:55 2024 +0200

libstdc++: Use __builtin_shufflevector for simd split and concat

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/114958
* include/experimental/bits/simd.h (__as_vector): Return scalar
simd as one-element vector. Return vector from single-vector
fixed_size simd.
(__vec_shuffle): New.
(__extract_part): Adjust return type signature.
(split): Use __extract_part for any split into non-fixed_size
simds.
(concat): If the return type stores a single vector, use
__vec_shuffle (which calls __builtin_shufflevector) to produce
the return value.
* include/experimental/bits/simd_builtin.h
(__shift_elements_right): Removed.
(__extract_part): Return single elements directly. Use
__vec_shuffle (which calls __builtin_shufflevector) to for all
non-trivial cases.
* include/experimental/bits/simd_fixed_size.h (__extract_part):
Return single elements directly.
* testsuite/experimental/simd/pr114958.cc: New test.

(cherry picked from commit fb1649f8b4ad5043dd0e65e4e3a643a0ced018a9)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h  | 161 +++--
 .../include/experimental/bits/simd_builtin.h   | 152 +--
 .../include/experimental/bits/simd_fixed_size.h|   4 +-
 .../testsuite/experimental/simd/pr114958.cc|  20 +++
 4 files changed, 145 insertions(+), 192 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 439545869be8..606062984405 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1616,7 +1616,24 @@ template 
 if constexpr (__is_vector_type_v<_V>)
   return __x;
 else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
-  return __data(__x)._M_data;
+  {
+   if constexpr (__is_fixed_size_abi_v)
+ {
+   static_assert(is_simd<_V>::value);
+   static_assert(_V::abi_type::template __traits<
+   typename 
_V::value_type>::_SimdMember::_S_tuple_size == 1);
+   return __as_vector(__data(__x).first);
+ }
+   else if constexpr (_V::size() > 1)
+ return __data(__x)._M_data;
+   else
+ {
+   static_assert(is_simd<_V>::value);
+   using _Tp = typename _V::value_type;
+   using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
+   return _RV{__data(__x)};
+ }
+  }
 else if constexpr (__is_vectorizable_v<_V>)
   return __vector_type_t<_V, 2>{__x};
 else
@@ -2026,6 +2043,60 @@ template >
   return ~__a;
   }
 
+// }}}
+// __vec_shuffle{{{
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun 
__idx_perm)
+  {
+constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
+constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
+#if __has_builtin(__builtin_shufflevector)
+#ifdef __clang__
+// Clang requires _T0 == _T1
+if constexpr (sizeof(__x) > sizeof(__y) and _N1 == 1)
+  return __vec_shuffle(__x, _T0{__y[0]}, __seq, __idx_perm);
+else if constexpr (sizeof(__x) > sizeof(__y))
+  return __vec_shuffle(__x, __intrin_bitcast<_T0>(__y), __seq, __idx_perm);
+else if constexpr (sizeof(__x) < sizeof(__y) and _N0 == 1)
+  return __vec_shuffle(_T1{__x[0]}, __y, __seq, [=](int __i) {
+  __i = __idx_perm(__i);
+  return __i < _N0 ? __i : __i - _N0 + _N1;
+});
+else if constexpr (sizeof(__x) < sizeof(__y))
+  return __vec_shuffle(__intrin_bitcast<_T1>(__x), __y, __seq, [=](int 
__i) {
+  __i = __idx_perm(__i);
+  return __i < _N0 ? __i : __i - _N0 + _N1;
+});
+else
+#endif
+  return __builtin_shufflevector(__x, __y, [=] {
+  constexpr int __j = __idx_perm(_Is);
+  static_assert(__j < _N0 + _N1);
+  return __j;
+}()...);
+#else
+using _Tp = __remove_cvref_t;
+return __vector_type_t<_Tp, sizeof...(_Is)> {
+  [=]() -> _Tp {
+   constexpr int __j = __idx_perm(_Is);
+   static_assert(__j < _N0 + _N1);
+   if constexpr (__j < 0)
+ return 0;
+   else if constexpr (__j < _N0)
+ return __x[__j];
+   else
+ return __y[__j - _N0];
+  }()...
+};
+#endif
+  }
+
+template 
+  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  __vec_shuffle(_T0 __x, _Seq __seq, _Fun __idx_perm)
+  { return __vec_shuffle(__x, _T0(), __seq, __idx_perm); }
+
 // }}}
 // __concat{{{
 

[gcc r12-10571] libstdc++: Avoid MMX return types from __builtin_shufflevector

2024-06-20 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:f79b273d4145961133ef8b0344469e77425629f6

commit r12-10571-gf79b273d4145961133ef8b0344469e77425629f6
Author: Matthias Kretz 
Date:   Wed May 15 11:02:22 2024 +0200

libstdc++: Avoid MMX return types from __builtin_shufflevector

This resolves a regression on i686 that was introduced with
r15-429-gfb1649f8b4ad50.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/115247
* include/experimental/bits/simd.h (__as_vector): Don't use
vector_size(8) on __i386__.
(__vec_shuffle): Never return MMX vectors, widen to 16 bytes
instead.
(concat): Fix padding calculation to pick up widening logic from
__as_vector.

(cherry picked from commit 241a6cc88d866fb36bd35ddb3edb659453d6322e)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 39 +++
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 606062984405..2cc280762cd1 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -1630,7 +1630,12 @@ template 
  {
static_assert(is_simd<_V>::value);
using _Tp = typename _V::value_type;
+#ifdef __i386__
+   constexpr auto __bytes = sizeof(_Tp) == 8 ? 16 : sizeof(_Tp);
+   using _RV [[__gnu__::__vector_size__(__bytes)]] = _Tp;
+#else
using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
+#endif
return _RV{__data(__x)};
  }
   }
@@ -2046,11 +2051,14 @@ template >
 // }}}
 // __vec_shuffle{{{
 template 
-  _GLIBCXX_SIMD_INTRINSIC constexpr auto
+  _GLIBCXX_SIMD_INTRINSIC constexpr
+  __vector_type_t()[0])>, 
sizeof...(_Is)>
   __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun 
__idx_perm)
   {
 constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
 constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
+using _Tp = remove_reference_t()[0])>;
+using _RV [[maybe_unused]] = __vector_type_t<_Tp, sizeof...(_Is)>;
 #if __has_builtin(__builtin_shufflevector)
 #ifdef __clang__
 // Clang requires _T0 == _T1
@@ -2070,14 +2078,23 @@ template 
 });
 else
 #endif
-  return __builtin_shufflevector(__x, __y, [=] {
-  constexpr int __j = __idx_perm(_Is);
-  static_assert(__j < _N0 + _N1);
-  return __j;
-}()...);
+  {
+   const auto __r = __builtin_shufflevector(__x, __y, [=] {
+  constexpr int __j = __idx_perm(_Is);
+  static_assert(__j < _N0 + _N1);
+  return __j;
+}()...);
+#ifdef __i386__
+   if constexpr (sizeof(__r) == sizeof(_RV))
+ return __r;
+   else
+ return _RV {__r[_Is]...};
+#else
+   return __r;
+#endif
+  }
 #else
-using _Tp = __remove_cvref_t;
-return __vector_type_t<_Tp, sizeof...(_Is)> {
+return _RV {
   [=]() -> _Tp {
constexpr int __j = __idx_perm(_Is);
static_assert(__j < _N0 + _N1);
@@ -4310,9 +4327,9 @@ template 
__vec_shuffle(__as_vector(__xs)..., 
std::make_index_sequence<_RW::_S_full_size>(),
  [](int __i) {
constexpr int __sizes[2] = 
{int(simd_size_v<_Tp, _As>)...};
-   constexpr int __padding0
- = sizeof(__vector_type_t<_Tp, __sizes[0]>) / 
sizeof(_Tp)
- - __sizes[0];
+   constexpr int __vsizes[2]
+ = {int(sizeof(__as_vector(__xs)) / 
sizeof(_Tp))...};
+   constexpr int __padding0 = __vsizes[0] - 
__sizes[0];
return __i >= _Np ? -1 : __i < __sizes[0] ? __i 
: __i + __padding0;
  })};
   }


[gcc r12-10572] libstdc++: Fix simd conversion for -fno-signed-char for Clang

2024-06-20 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:b9569e7a829d054336d2704ccff61eece5437baf

commit r12-10572-gb9569e7a829d054336d2704ccff61eece5437baf
Author: Matthias Kretz 
Date:   Mon Jun 3 12:02:07 2024 +0200

libstdc++: Fix simd conversion for -fno-signed-char for Clang

The special case for Clang in the trait producing a signed integer type
lead to the trait returning 'char' where it should have been 'signed
char'. This workaround was introduced because on Clang the return type
of vector compares was not convertible to '_SimdWrapper<
__int_for_sizeof_t<...' unless '__int_for_sizeof_t' was an alias
for 'char'. In order to not rewrite the complete mask type code (there
is code scattered around the implementation assuming signed integers),
this needs to be 'signed char'; so the special case for Clang needs to
be removed.
The conversion issue is now solved in _SimdWrapper, which now
additionally allows conversion from vector types with compatible
integral type.

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/115308
* include/experimental/bits/simd.h (__int_for_sizeof): Remove
special cases for __clang__.
(_SimdWrapper): Change constructor overload set to allow
conversion from vector types with integral conversions via bit
reinterpretation.

(cherry picked from commit 8e36cf4c5c9140915d001db132a900b48037)

Diff:
---
 libstdc++-v3/include/experimental/bits/simd.h | 45 ---
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/libstdc++-v3/include/experimental/bits/simd.h 
b/libstdc++-v3/include/experimental/bits/simd.h
index 2cc280762cd1..365f5158f0d9 100644
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -584,19 +584,12 @@ template 
 static_assert(_Bytes > 0);
 if constexpr (_Bytes == sizeof(int))
   return int();
-  #ifdef __clang__
-else if constexpr (_Bytes == sizeof(char))
-  return char();
-  #else
 else if constexpr (_Bytes == sizeof(_SChar))
   return _SChar();
-  #endif
 else if constexpr (_Bytes == sizeof(short))
   return short();
-  #ifndef __clang__
 else if constexpr (_Bytes == sizeof(long))
   return long();
-  #endif
 else if constexpr (_Bytes == sizeof(_LLong))
   return _LLong();
   #ifdef __SIZEOF_INT128__
@@ -2712,6 +2705,8 @@ template 
 
 // }}}
 // _SimdWrapper{{{
+struct _DisabledSimdWrapper;
+
 template 
   struct _SimdWrapper<
 _Tp, _Width,
@@ -2721,16 +2716,17 @@ template 
  == sizeof(__vector_type_t<_Tp, _Width>),
   __vector_type_t<_Tp, _Width>>
   {
-using _Base
-  = _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
-  && sizeof(_Tp) * _Width
-   == sizeof(__vector_type_t<_Tp, _Width>),
-__vector_type_t<_Tp, _Width>>;
+static constexpr bool _S_need_default_init
+  = __has_iec559_behavior<__signaling_NaN, _Tp>::value
+ and sizeof(_Tp) * _Width == sizeof(__vector_type_t<_Tp, _Width>);
+
+using _BuiltinType = __vector_type_t<_Tp, _Width>;
+
+using _Base = _SimdWrapperBase<_S_need_default_init, _BuiltinType>;
 
 static_assert(__is_vectorizable_v<_Tp>);
 static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
 
-using _BuiltinType = __vector_type_t<_Tp, _Width>;
 using value_type = _Tp;
 
 static inline constexpr size_t _S_full_size
@@ -2766,13 +2762,26 @@ template 
 _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
 operator=(_SimdWrapper&&) = default;
 
-template >,
-is_same<_V, __intrinsic_type_t<_Tp, _Width>
+// Convert from exactly matching __vector_type_t
+using _SimdWrapperBase<_S_need_default_init, 
_BuiltinType>::_SimdWrapperBase;
+
+// Convert from __intrinsic_type_t if __intrinsic_type_t and 
__vector_type_t differ, otherwise
+// this ctor should not exist. Making the argument type unusable is our 
next best solution.
+_GLIBCXX_SIMD_INTRINSIC constexpr
+_SimdWrapper(conditional_t>,
+  _DisabledSimdWrapper, __intrinsic_type_t<_Tp, 
_Width>> __x)
+: _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
+
+// Convert from different __vector_type_t, but only if bit 
reinterpretation is a correct
+// conversion of the value_type
+template ,
+ typename = enable_if_t
+  and is_integral_v>>
   _GLIBCXX_SIMD_INTRINSIC constexpr
   _SimdWrapper(_V __x)
-  // __vector_bitcast can convert e.g. __m128 to __vector(2) float
-  : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
+  : _Base(reinterpret_cast<_BuiltinType>(__x)) {}
 
 template  && ...)


[gcc r13-8862] libstdc++: Fix test on x86_64 and non-simd targets

2024-06-21 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:c335e34ff89ec9aec1ba874dc5cece9c2303c906

commit r13-8862-gc335e34ff89ec9aec1ba874dc5cece9c2303c906
Author: Matthias Kretz 
Date:   Fri Jun 21 16:22:22 2024 +0200

libstdc++: Fix test on x86_64 and non-simd targets

* Running a test compiled with AVX512 instructions requires
avx512f_runtime not just avx512f.

* The 'reduce2' test violated an invariant of fixed_size_simd_mask and
thus failed on all targets without 16-Byte vector builtins enabled (in
bits/simd.h).

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/115575
* testsuite/experimental/simd/pr115454_find_last_set.cc: Require
avx512f_runtime. Don't memcpy fixed_size masks.

(cherry picked from commit 77f321435b4ac37992c2ed6737ca0caa1dd50551)

Diff:
---
 libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc 
b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
index b47f19d3067..25a713b4e94 100644
--- a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
@@ -1,7 +1,7 @@
 // { dg-options "-std=gnu++17" }
 // { dg-do run { target *-*-* } }
 // { dg-require-effective-target c++17 }
-// { dg-additional-options "-march=x86-64-v4" { target avx512f } }
+// { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } }
 // { dg-require-cmath "" }
 
 #include 
@@ -25,7 +25,9 @@ int reduce2()
 {
   using M8 = typename V::mask_type;
   using M4 = typename V::mask_type;
-  if constexpr (sizeof(M8) == sizeof(M4))
+  if constexpr (sizeof(M8) == sizeof(M4)
+ && !std::is_same_v>)
+// fixed_size invariant: padding bits of masks are zero, the memcpy would 
violate that
 {
   M4 k;
   __builtin_memcpy(&__data(k), &__data(M8(true)), sizeof(M4));


[gcc r12-10575] libstdc++: Fix test on x86_64 and non-simd targets

2024-06-21 Thread Matthias Kretz via Gcc-cvs
https://gcc.gnu.org/g:169d4d1addaac7eef6cde4049aa8b4f3d81c28b0

commit r12-10575-g169d4d1addaac7eef6cde4049aa8b4f3d81c28b0
Author: Matthias Kretz 
Date:   Fri Jun 21 16:22:22 2024 +0200

libstdc++: Fix test on x86_64 and non-simd targets

* Running a test compiled with AVX512 instructions requires
avx512f_runtime not just avx512f.

* The 'reduce2' test violated an invariant of fixed_size_simd_mask and
thus failed on all targets without 16-Byte vector builtins enabled (in
bits/simd.h).

Signed-off-by: Matthias Kretz 

libstdc++-v3/ChangeLog:

PR libstdc++/115575
* testsuite/experimental/simd/pr115454_find_last_set.cc: Require
avx512f_runtime. Don't memcpy fixed_size masks.

(cherry picked from commit 77f321435b4ac37992c2ed6737ca0caa1dd50551)

Diff:
---
 libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc 
b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
index b47f19d3067..25a713b4e94 100644
--- a/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
+++ b/libstdc++-v3/testsuite/experimental/simd/pr115454_find_last_set.cc
@@ -1,7 +1,7 @@
 // { dg-options "-std=gnu++17" }
 // { dg-do run { target *-*-* } }
 // { dg-require-effective-target c++17 }
-// { dg-additional-options "-march=x86-64-v4" { target avx512f } }
+// { dg-additional-options "-march=x86-64-v4" { target avx512f_runtime } }
 // { dg-require-cmath "" }
 
 #include 
@@ -25,7 +25,9 @@ int reduce2()
 {
   using M8 = typename V::mask_type;
   using M4 = typename V::mask_type;
-  if constexpr (sizeof(M8) == sizeof(M4))
+  if constexpr (sizeof(M8) == sizeof(M4)
+ && !std::is_same_v>)
+// fixed_size invariant: padding bits of masks are zero, the memcpy would 
violate that
 {
   M4 k;
   __builtin_memcpy(&__data(k), &__data(M8(true)), sizeof(M4));