Tested on x86_64-linux-gnu. OK for trunk? 

----- 8< ------

Previously, this was necessary for efficient reductions. When I tried to
turn it into a missed-optimization PR, there was no issue (anymore). The
generic _S_static_permute implementation optimizes just fine (or better)
at this point.

The traits_impl.cc change is a simple drive-by fix.

libstdc++-v3/ChangeLog:

        * include/bits/simd_vec.h (_M_elements_shifted_to_front): Remove
        x86-specific implementation.
        * testsuite/std/simd/traits_impl.cc: Fix incorrect macro name.

Signed-off-by: Matthias Kretz <[email protected]>
---
 libstdc++-v3/include/bits/simd_vec.h          | 23 -------------------
 .../testsuite/std/simd/traits_impl.cc         |  2 +-
 2 files changed, 1 insertion(+), 24 deletions(-)

diff --git a/libstdc++-v3/include/bits/simd_vec.h b/libstdc++-v3/include/bits/
simd_vec.h
index bf6616df227..54c1502b23b 100644
--- a/libstdc++-v3/include/bits/simd_vec.h
+++ b/libstdc++-v3/include/bits/simd_vec.h
@@ -449,29 +449,6 @@ _M_elements_shifted_to_front() const
          static_assert(_Shift < _S_size && -_Shift < _S_size);
          if constexpr (_Shift == 0)
            return *this;
-#ifdef __SSE2__
-         else if (!__is_const_known(*this))
-           {
-             if constexpr (sizeof(_M_data) == 16 && _Shift > 0)
-               return reinterpret_cast<_DataType>(
-                        __builtin_ia32_psrldqi128(__vec_bit_cast<long 
long>(_M_data),
-                                                  _Shift * 
sizeof(value_type) * 8));
-             else if constexpr (sizeof(_M_data) == 16 && _Shift < 0)
-               return reinterpret_cast<_DataType>(
-                        __builtin_ia32_pslldqi128(__vec_bit_cast<long 
long>(_M_data),
-                                                  -_Shift * 
sizeof(value_type) * 8));
-             else if constexpr (sizeof(_M_data) < 16)
-               {
-                 auto __x = reinterpret_cast<__vec_builtin_type_bytes<long 
long, 16>>(
-                              __vec_zero_pad_to_16(_M_data));
-                 if constexpr (_Shift > 0)
-                   __x = __builtin_ia32_psrldqi128(__x, _Shift * 
sizeof(value_type) * 8);
-                 else
-                   __x = __builtin_ia32_pslldqi128(__x, -_Shift * 
sizeof(value_type) * 8);
-                 return 
_VecOps<_DataType>::_S_extract(__vec_bit_cast<__canon_value_type>(__x));
-               }
-           }
-#endif
          return _S_static_permute(*this, [](int __i) consteval {
                   int __off = __i + _Shift;
                   return __off >= _S_size || __off < 0 ? zero_element : 
__off;
diff --git a/libstdc++-v3/testsuite/std/simd/traits_impl.cc b/libstdc++-v3/
testsuite/std/simd/traits_impl.cc
index dde41c70aca..9f6e9f42b6e 100644
--- a/libstdc++-v3/testsuite/std/simd/traits_impl.cc
+++ b/libstdc++-v3/testsuite/std/simd/traits_impl.cc
@@ -49,7 +49,7 @@ namespace simd
       static_assert(__vectorizable<__integer_from<N>>);
     }
   template for (constexpr int N : {
-#ifdef __STDCPP_BFLOAT16_T__
+#ifdef __STDCPP_FLOAT16_T__
                                  2,
 #endif
                                  4, 8})
-- 
──────────────────────────────────────────────────────────────────────────
 Dr. Matthias Kretz                           https://mattkretz.github.io
 GSI Helmholtz Center for Heavy Ion Research               https://gsi.de
 std::simd
──────────────────────────────────────────────────────────────────────────




Reply via email to