[PATCH] D44659: [libcxx] Optimize -O0 performance for operators

Tim Shen via Phabricator via cfe-commits Mon, 19 Mar 2018 17:47:42 -0700

timshen created this revision.
timshen added a reviewer: mclow.lists.
Herald added subscribers: christof, sanjoy.
Herald added a reviewer: EricWF.


When vector extension (__attribute__((vector_size(...)))) is available
use its operators, instead of generating loops of scalar operations.


https://reviews.llvm.org/D44659

Files:
  libcxx/include/experimental/simd
  libcxx/test/std/experimental/simd/simd.elementwise/operators.pass.cpp

Index: libcxx/test/std/experimental/simd/simd.elementwise/operators.pass.cpp
===================================================================
--- libcxx/test/std/experimental/simd/simd.elementwise/operators.pass.cpp
+++ libcxx/test/std/experimental/simd/simd.elementwise/operators.pass.cpp
@@ -64,136 +64,138 @@
 
 using namespace std::experimental::parallelism_v2;
 
+template <class SimdType>
 void test_pure_operators() {
   {
-    native_simd<int> a(42), b(4);
+    SimdType a(42), b(4);
 
-    assert(all_of(~a == native_simd<int>(~42)));
+    assert(all_of(~a == SimdType(~42)));
     assert(all_of(+a == a));
-    assert(all_of(-a == native_simd<int>(-42)));
-    assert(all_of(a + b == native_simd<int>(42 + 4)));
-    assert(all_of(a - b == native_simd<int>(42 - 4)));
-    assert(all_of(a * b == native_simd<int>(42 * 4)));
-    assert(all_of(a / b == native_simd<int>(42 / 4)));
-    assert(all_of(a % b == native_simd<int>(42 % 4)));
-    assert(all_of((a & b) == native_simd<int>(42 & 4)));
-    assert(all_of((a | b) == native_simd<int>(42 | 4)));
-    assert(all_of((a ^ b) == native_simd<int>(42 ^ 4)));
-    assert(all_of((a << b) == native_simd<int>(42 << 4)));
-    assert(all_of((a >> b) == native_simd<int>(42 >> 4)));
-    assert(all_of((a << 4) == native_simd<int>(42 << 4)));
-    assert(all_of((a >> 4) == native_simd<int>(42 >> 4)));
+    assert(all_of(-a == SimdType(-42)));
+    assert(all_of(a + b == SimdType(42 + 4)));
+    assert(all_of(a - b == SimdType(42 - 4)));
+    assert(all_of(a * b == SimdType(42 * 4)));
+    assert(all_of(a / b == SimdType(42 / 4)));
+    assert(all_of(a % b == SimdType(42 % 4)));
+    assert(all_of((a & b) == SimdType(42 & 4)));
+    assert(all_of((a | b) == SimdType(42 | 4)));
+    assert(all_of((a ^ b) == SimdType(42 ^ 4)));
+    assert(all_of((a << b) == SimdType(42 << 4)));
+    assert(all_of((a >> b) == SimdType(42 >> 4)));
+    assert(all_of((a << 4) == SimdType(42 << 4)));
+    assert(all_of((a >> 4) == SimdType(42 >> 4)));
   }
   {
-    native_simd<int> a([](int i) { return 2 * i + 1; }),
-        b([](int i) { return i + 1; });
+    SimdType a([](int i) { return 2 * i + 1; }), b([](int i) { return i + 1; });
 
-    assert(all_of(~a == native_simd<int>([](int i) { return ~(2 * i + 1); })));
+    assert(all_of(~a == SimdType([](int i) { return ~(2 * i + 1); })));
     assert(all_of(+a == a));
-    assert(all_of(-a == native_simd<int>([](int i) { return -(2 * i + 1); })));
-    assert(all_of(a + b == native_simd<int>([](int i) { return 3 * i + 2; })));
-    assert(all_of(a - b == native_simd<int>([](int i) { return i; })));
-    assert(all_of(a * b == native_simd<int>(
-                               [](int i) { return (2 * i + 1) * (i + 1); })));
-    assert(all_of(a / b == native_simd<int>(
-                               [](int i) { return (2 * i + 1) / (i + 1); })));
-    assert(all_of(a % b == native_simd<int>(
-                               [](int i) { return (2 * i + 1) % (i + 1); })));
-    assert(all_of((a & b) == native_simd<int>(
-                                 [](int i) { return (2 * i + 1) & (i + 1); })));
-    assert(all_of((a | b) == native_simd<int>(
-                                 [](int i) { return (2 * i + 1) | (i + 1); })));
-    assert(all_of((a ^ b) == native_simd<int>(
-                                 [](int i) { return (2 * i + 1) ^ (i + 1); })));
+    assert(all_of(-a == SimdType([](int i) { return -(2 * i + 1); })));
+    assert(all_of(a + b == SimdType([](int i) { return 3 * i + 2; })));
+    assert(all_of(a - b == SimdType([](int i) { return i; })));
+    assert(
+        all_of(a * b == SimdType([](int i) { return (2 * i + 1) * (i + 1); })));
+    assert(
+        all_of(a / b == SimdType([](int i) { return (2 * i + 1) / (i + 1); })));
+    assert(
+        all_of(a % b == SimdType([](int i) { return (2 * i + 1) % (i + 1); })));
+    assert(all_of((a & b) ==
+                  SimdType([](int i) { return (2 * i + 1) & (i + 1); })));
+    assert(all_of((a | b) ==
+                  SimdType([](int i) { return (2 * i + 1) | (i + 1); })));
+    assert(all_of((a ^ b) ==
+                  SimdType([](int i) { return (2 * i + 1) ^ (i + 1); })));
   }
 }
 
+template <class SimdType>
 void test_mutating_opreators() {
-  native_simd<int> b(4);
+  SimdType b(4);
   {
-    native_simd<int> a(42);
-    assert(all_of(++a == native_simd<int>(43)));
-    assert(all_of(a == native_simd<int>(43)));
+    SimdType a(42);
+    assert(all_of(++a == SimdType(43)));
+    assert(all_of(a == SimdType(43)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of(a++ == native_simd<int>(42)));
-    assert(all_of(a == native_simd<int>(43)));
+    SimdType a(42);
+    assert(all_of(a++ == SimdType(42)));
+    assert(all_of(a == SimdType(43)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of(--a == native_simd<int>(41)));
-    assert(all_of(a == native_simd<int>(41)));
+    SimdType a(42);
+    assert(all_of(--a == SimdType(41)));
+    assert(all_of(a == SimdType(41)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of(a-- == native_simd<int>(42)));
-    assert(all_of(a == native_simd<int>(41)));
+    SimdType a(42);
+    assert(all_of(a-- == SimdType(42)));
+    assert(all_of(a == SimdType(41)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a += b) == native_simd<int>(42 + 4)));
-    assert(all_of(a == native_simd<int>(42 + 4)));
+    SimdType a(42);
+    assert(all_of((a += b) == SimdType(42 + 4)));
+    assert(all_of(a == SimdType(42 + 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a -= b) == native_simd<int>(42 - 4)));
-    assert(all_of(a == native_simd<int>(42 - 4)));
+    SimdType a(42);
+    assert(all_of((a -= b) == SimdType(42 - 4)));
+    assert(all_of(a == SimdType(42 - 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a *= b) == native_simd<int>(42 * 4)));
-    assert(all_of(a == native_simd<int>(42 * 4)));
+    SimdType a(42);
+    assert(all_of((a *= b) == SimdType(42 * 4)));
+    assert(all_of(a == SimdType(42 * 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a /= b) == native_simd<int>(42 / 4)));
-    assert(all_of(a == native_simd<int>(42 / 4)));
+    SimdType a(42);
+    assert(all_of((a /= b) == SimdType(42 / 4)));
+    assert(all_of(a == SimdType(42 / 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a %= b) == native_simd<int>(42 % 4)));
-    assert(all_of(a == native_simd<int>(42 % 4)));
+    SimdType a(42);
+    assert(all_of((a %= b) == SimdType(42 % 4)));
+    assert(all_of(a == SimdType(42 % 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a &= b) == native_simd<int>(42 & 4)));
-    assert(all_of(a == native_simd<int>(42 & 4)));
+    SimdType a(42);
+    assert(all_of((a &= b) == SimdType(42 & 4)));
+    assert(all_of(a == SimdType(42 & 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a |= b) == native_simd<int>(42 | 4)));
-    assert(all_of(a == native_simd<int>(42 | 4)));
+    SimdType a(42);
+    assert(all_of((a |= b) == SimdType(42 | 4)));
+    assert(all_of(a == SimdType(42 | 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a ^= b) == native_simd<int>(42 ^ 4)));
-    assert(all_of(a == native_simd<int>(42 ^ 4)));
+    SimdType a(42);
+    assert(all_of((a ^= b) == SimdType(42 ^ 4)));
+    assert(all_of(a == SimdType(42 ^ 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a <<= b) == native_simd<int>(42 << 4)));
-    assert(all_of(a == native_simd<int>(42 << 4)));
+    SimdType a(42);
+    assert(all_of((a <<= b) == SimdType(42 << 4)));
+    assert(all_of(a == SimdType(42 << 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a >>= b) == native_simd<int>(42 >> 4)));
-    assert(all_of(a == native_simd<int>(42 >> 4)));
+    SimdType a(42);
+    assert(all_of((a >>= b) == SimdType(42 >> 4)));
+    assert(all_of(a == SimdType(42 >> 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a <<= 4) == native_simd<int>(42 << 4)));
-    assert(all_of(a == native_simd<int>(42 << 4)));
+    SimdType a(42);
+    assert(all_of((a <<= 4) == SimdType(42 << 4)));
+    assert(all_of(a == SimdType(42 << 4)));
   }
   {
-    native_simd<int> a(42);
-    assert(all_of((a >>= 4) == native_simd<int>(42 >> 4)));
-    assert(all_of(a == native_simd<int>(42 >> 4)));
+    SimdType a(42);
+    assert(all_of((a >>= 4) == SimdType(42 >> 4)));
+    assert(all_of(a == SimdType(42 >> 4)));
   }
 }
 
+template <class SimdType>
 void test_relational_operators() {
-  fixed_size_simd<int, 9> a, b;
+  SimdType a, b;
   {
     int buf[] = {1, 1, 1, 2, 2, 2, 3, 3, 3};
     a.copy_from(buf, element_aligned_tag());
@@ -208,58 +210,62 @@
       false, true, false,
       false, false, true,
     };
-    assert(all_of((a == b) == fixed_size_simd_mask<int, 9>(
+    assert(all_of((a == b) == typename SimdType::mask_type(
                                   expected, element_aligned_tag())));
   }
   {
     bool expected[] = {
       false, true, true,
       true, false, true,
       true, true, false,
     };
-    assert(all_of((a != b) == fixed_size_simd_mask<int, 9>(
+    assert(all_of((a != b) == typename SimdType::mask_type(
                                   expected, element_aligned_tag())));
   }
   {
     bool expected[] = {
       false, true, true,
       false, false, true,
       false, false, false,
     };
-    assert(all_of((a < b) == fixed_size_simd_mask<int, 9>(
+    assert(all_of((a < b) == typename SimdType::mask_type(
                                  expected, element_aligned_tag())));
   }
   {
     bool expected[] = {
       true, true, true,
       false, true, true,
       false, false, true,
     };
-    assert(all_of((a <= b) == fixed_size_simd_mask<int, 9>(
+    assert(all_of((a <= b) == typename SimdType::mask_type(
                                   expected, element_aligned_tag())));
   }
   {
     bool expected[] = {
       false, false, false,
       true, false, false,
       true, true, false,
     };
-    assert(all_of((a > b) == fixed_size_simd_mask<int, 9>(
+    assert(all_of((a > b) == typename SimdType::mask_type(
                                  expected, element_aligned_tag())));
   }
   {
     bool expected[] = {
       true, false, false,
       true, true, false,
       true, true, true,
     };
-    assert(all_of((a >= b) == fixed_size_simd_mask<int, 9>(
+    assert(all_of((a >= b) == typename SimdType::mask_type(
                                   expected, element_aligned_tag())));
   }
 }
 
 int main() {
-  test_pure_operators();
-  test_mutating_opreators();
-  test_relational_operators();
+  test_pure_operators<native_simd<int>>();
+  test_pure_operators<fixed_size_simd<int, 4>>();
+  test_mutating_opreators<native_simd<int>>();
+  test_mutating_opreators<fixed_size_simd<int, 4>>();
+  test_relational_operators<
+      simd<int, rebind_abi_t<int, 9, simd_abi::native<int>>>>();
+  test_relational_operators<fixed_size_simd<int, 9>>();
 }
Index: libcxx/include/experimental/simd
===================================================================
--- libcxx/include/experimental/simd
+++ libcxx/include/experimental/simd
@@ -622,11 +622,166 @@
 template <_StorageKind __kind, int _Np>
 struct __simd_abi {};
 
+template <class _Derived>
+struct __simd_storage_base {
+  static _Derived __neg(const _Derived& __a) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, -__a.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __add(const _Derived& __a, const _Derived& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) + __b.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __sub(const _Derived& __a, const _Derived& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) - __b.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __mul(const _Derived& __a, const _Derived& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) * __b.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __div(const _Derived& __a, const _Derived& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) / __b.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __mod(const _Derived& __a, const _Derived& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) % __b.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __and(const _Derived& __a, const _Derived& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) & __b.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __or(const _Derived& __a, const _Derived& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) | __b.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __not(const _Derived& __a) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, ~__a.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __xor(const _Derived& __a, const _Derived& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) ^ __b.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __shl(const _Derived& __a, const _Derived& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) << __b.__get(__i));
+    }
+    return __v;
+  }
+
+  static _Derived __shr(const _Derived& __a, const _Derived& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) >> __b.__get(__i));
+    }
+    return __v;
+  }
+
+  template <class _InputSimd>
+  static _Derived __cmp_eq(const _InputSimd& __a, const _InputSimd& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) == __b.__get(__i) ? -1 : 0);
+    }
+    return __v;
+  }
+
+  template <class _InputSimd>
+  static _Derived __cmp_ne(const _InputSimd& __a, const _InputSimd& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) != __b.__get(__i) ? -1 : 0);
+    }
+    return __v;
+  }
+
+  template <class _InputSimd>
+  static _Derived __cmp_le(const _InputSimd& __a, const _InputSimd& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) <= __b.__get(__i) ? -1 : 0);
+    }
+    return __v;
+  }
+
+  template <class _InputSimd>
+  static _Derived __cmp_ge(const _InputSimd& __a, const _InputSimd& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) >= __b.__get(__i) ? -1 : 0);
+    }
+    return __v;
+  }
+
+  template <class _InputSimd>
+  static _Derived __cmp_lt(const _InputSimd& __a, const _InputSimd& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) < __b.__get(__i) ? -1 : 0);
+    }
+    return __v;
+  }
+
+  template <class _InputSimd>
+  static _Derived __cmp_gt(const _InputSimd& __a, const _InputSimd& __b) {
+    _Derived __v;
+    for (size_t __i = 0; __i < _Derived::__size(); __i++) {
+      __v.__set(__i, __a.__get(__i) > __b.__get(__i) ? -1 : 0);
+    }
+    return __v;
+  }
+};
+
 template <class _Tp, class _Abi>
 class __simd_storage {};
 
 template <class _Tp, int __num_element>
-class __simd_storage<_Tp, __simd_abi<_StorageKind::_Array, __num_element>> {
+class __simd_storage<_Tp, __simd_abi<_StorageKind::_Array, __num_element>>
+    : public __simd_storage_base<__simd_storage<
+          _Tp, __simd_abi<_StorageKind::_Array, __num_element>>> {
   std::array<_Tp, __num_element> __storage_;
 
   template <class, class>
@@ -638,15 +793,19 @@
 public:
   using __raw_type = std::array<_Tp, __num_element>;
 
+  static constexpr size_t __size() { return __num_element; }
+
   __simd_storage() = default;
   void __assign(__raw_type __raw) { __storage_ = __raw; }
   __raw_type __raw() const { return __storage_; }
   _Tp __get(size_t __index) const { return __storage_[__index]; };
   void __set(size_t __index, _Tp __val) { __storage_[__index] = __val; }
 };
 
 template <class _Tp>
-class __simd_storage<_Tp, __simd_abi<_StorageKind::_Scalar, 1>> {
+class __simd_storage<_Tp, __simd_abi<_StorageKind::_Scalar, 1>>
+    : public __simd_storage_base<
+          __simd_storage<_Tp, __simd_abi<_StorageKind::_Scalar, 1>>> {
   _Tp __storage_;
 
   template <class, class>
@@ -658,6 +817,8 @@
 public:
   using __raw_type = _Tp;
 
+  static constexpr size_t __size() { return 1; }
+
   __simd_storage() = default;
   void __assign(__raw_type __raw) { __storage_ = __raw; }
   __raw_type __raw() const { return __storage_; }
@@ -780,7 +941,9 @@
 #endif
 
 template <class _Tp, int __num_element>
-class __simd_storage<_Tp, __simd_abi<_StorageKind::_VecExt, __num_element>> {
+class __simd_storage<_Tp, __simd_abi<_StorageKind::_VecExt, __num_element>>
+    : public __simd_storage_base<__simd_storage<
+          _Tp, __simd_abi<_StorageKind::_VecExt, __num_element>>> {
   using _StorageType =
       typename __vec_ext_traits<_Tp, sizeof(_Tp) * __num_element>::type;
 
@@ -792,14 +955,107 @@
   template <class, class>
   friend struct simd_mask;
 
+  __simd_storage(_StorageType __s) : __storage_(__s) {}
+
 public:
   using __raw_type = _StorageType;
 
+  static constexpr size_t __size() { return __num_element; }
+
   __simd_storage() = default;
   void __assign(__raw_type __raw) { __storage_ = __raw; }
   __raw_type __raw() const { return __storage_; }
+
   _Tp __get(size_t __index) const { return __storage_[__index]; };
   void __set(size_t __index, _Tp __val) { __storage_[__index] = __val; }
+
+  static __simd_storage __neg(const __simd_storage& __a) {
+    return __simd_storage(-__a.__storage_);
+  }
+
+  static __simd_storage __add(const __simd_storage& __a,
+                              const __simd_storage& __b) {
+    return __simd_storage(__a.__storage_ + __b.__storage_);
+  }
+
+  static __simd_storage __sub(const __simd_storage& __a,
+                              const __simd_storage& __b) {
+    return __simd_storage(__a.__storage_ - __b.__storage_);
+  }
+
+  static __simd_storage __mul(const __simd_storage& __a,
+                              const __simd_storage& __b) {
+    return __simd_storage(__a.__storage_ * __b.__storage_);
+  }
+
+  static __simd_storage __div(const __simd_storage& __a,
+                              const __simd_storage& __b) {
+    return __simd_storage(__a.__storage_ / __b.__storage_);
+  }
+
+  static __simd_storage __mod(const __simd_storage& __a,
+                              const __simd_storage& __b) {
+    return __simd_storage(__a.__storage_ % __b.__storage_);
+  }
+
+  static __simd_storage __and(const __simd_storage& __a,
+                              const __simd_storage& __b) {
+    return __simd_storage(__a.__storage_ & __b.__storage_);
+  }
+
+  static __simd_storage __or(const __simd_storage& __a,
+                             const __simd_storage& __b) {
+    return __simd_storage(__a.__storage_ | __b.__storage_);
+  }
+
+  static __simd_storage __xor(const __simd_storage& __a,
+                              const __simd_storage& __b) {
+    return __simd_storage(__a.__storage_ ^ __b.__storage_);
+  }
+
+  static __simd_storage __not(const __simd_storage& __a) {
+    return __simd_storage(~__a.__storage_);
+  }
+
+  static __simd_storage __shl(const __simd_storage& __a,
+                              const __simd_storage& __b) {
+    return __simd_storage(__a.__storage_ << __b.__storage_);
+  }
+
+  static __simd_storage __shr(const __simd_storage& __a,
+                              const __simd_storage& __b) {
+    return __simd_storage(__a.__storage_ >> __b.__storage_);
+  }
+
+  template <class _InputSimd>
+  static __simd_storage __cmp_eq(const _InputSimd& __a, const _InputSimd& __b) {
+    return __simd_storage(__a.__raw() == __b.__raw());
+  }
+
+  template <class _InputSimd>
+  static __simd_storage __cmp_ne(const _InputSimd& __a, const _InputSimd& __b) {
+    return __simd_storage(__a.__raw() != __b.__raw());
+  }
+
+  template <class _InputSimd>
+  static __simd_storage __cmp_le(const _InputSimd& __a, const _InputSimd& __b) {
+    return __simd_storage(__a.__raw() <= __b.__raw());
+  }
+
+  template <class _InputSimd>
+  static __simd_storage __cmp_ge(const _InputSimd& __a, const _InputSimd& __b) {
+    return __simd_storage(__a.__raw() >= __b.__raw());
+  }
+
+  template <class _InputSimd>
+  static __simd_storage __cmp_lt(const _InputSimd& __a, const _InputSimd& __b) {
+    return __simd_storage(__a.__raw() < __b.__raw());
+  }
+
+  template <class _InputSimd>
+  static __simd_storage __cmp_gt(const _InputSimd& __a, const _InputSimd& __b) {
+    return __simd_storage(__a.__raw() > __b.__raw());
+  }
 };
 
 #endif // _LIBCPP_HAS_NO_VECTOR_EXTENSION
@@ -1901,6 +2157,9 @@
     return simd_size<_Tp, _Abi>::value;
   }
 
+  template <class, class>
+  friend class simd;
+
   template <class, class>
   friend class simd_mask;
 
@@ -1959,6 +2218,50 @@
     }
   }
 
+  static simd __from_storage(__simd_storage<_Tp, _Abi> __s) {
+    simd __v;
+    __v.__s_ = __s;
+    return __v;
+  }
+
+  // The actual implementation for all relational operators due to how `friend`
+  // works. See CWG 1699.
+  static mask_type __cmp_eq_impl(const simd& __a, const simd& __b) {
+    using __element = typename __unsigned_traits<sizeof(_Tp)>::type;
+    return mask_type(simd<__element, _Abi>::__from_storage(
+        __simd_storage<__element, _Abi>::__cmp_eq(__a.__s_, __b.__s_)));
+  }
+
+  static mask_type __cmp_ne_impl(const simd& __a, const simd& __b) {
+    using __element = typename __unsigned_traits<sizeof(_Tp)>::type;
+    return mask_type(simd<__element, _Abi>::__from_storage(
+        __simd_storage<__element, _Abi>::__cmp_ne(__a.__s_, __b.__s_)));
+  }
+
+  static mask_type __cmp_ge_impl(const simd& __a, const simd& __b) {
+    using __element = typename __unsigned_traits<sizeof(_Tp)>::type;
+    return mask_type(simd<__element, _Abi>::__from_storage(
+        __simd_storage<__element, _Abi>::__cmp_ge(__a.__s_, __b.__s_)));
+  }
+
+  static mask_type __cmp_le_impl(const simd& __a, const simd& __b) {
+    using __element = typename __unsigned_traits<sizeof(_Tp)>::type;
+    return mask_type(simd<__element, _Abi>::__from_storage(
+        __simd_storage<__element, _Abi>::__cmp_le(__a.__s_, __b.__s_)));
+  }
+
+  static mask_type __cmp_gt_impl(const simd& __a, const simd& __b) {
+    using __element = typename __unsigned_traits<sizeof(_Tp)>::type;
+    return mask_type(simd<__element, _Abi>::__from_storage(
+        __simd_storage<__element, _Abi>::__cmp_gt(__a.__s_, __b.__s_)));
+  }
+
+  static mask_type __cmp_lt_impl(const simd& __a, const simd& __b) {
+    using __element = typename __unsigned_traits<sizeof(_Tp)>::type;
+    return mask_type(simd<__element, _Abi>::__from_storage(
+        __simd_storage<__element, _Abi>::__cmp_lt(__a.__s_, __b.__s_)));
+  }
+
 public:
   // implicit type conversion constructor
   template <class _Up,
@@ -2067,98 +2370,56 @@
   mask_type operator!() const { return *this == simd(0); }
 
   simd operator~() const {
-    simd __v;
-    for (size_t __i = 0; __i < size(); __i++) {
-      __v[__i] = ~(*this)[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__not(this->__s_));
   }
 
   simd operator+() const { return *this; }
 
-  simd operator-() const { return simd(0) - *this; }
+  simd operator-() const {
+    return __from_storage(__simd_storage<_Tp, _Abi>::__neg(this->__s_));
+  }
 
   // binary operators [simd.binary]
   // TODO: regarding NOTE 9, the implementationn chooses not to SFINAE,
   // but causes a hard error when the operator can't work on _Tp.
   friend simd operator+(const simd& __a, const simd& __b) {
-    simd __v;
-    for (size_t __i = 0; __i < __v.size(); __i++) {
-      __v[__i] = __a[__i] + __b[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__add(__a.__s_, __b.__s_));
   }
 
   friend simd operator-(const simd& __a, const simd& __b) {
-    simd __v;
-    for (size_t __i = 0; __i < __v.size(); __i++) {
-      __v[__i] = __a[__i] - __b[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__sub(__a.__s_, __b.__s_));
   }
 
   friend simd operator*(const simd& __a, const simd& __b) {
-    simd __v;
-    for (size_t __i = 0; __i < __v.size(); __i++) {
-      __v[__i] = __a[__i] * __b[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__mul(__a.__s_, __b.__s_));
   }
 
   friend simd operator/(const simd& __a, const simd& __b) {
-    simd __v;
-    for (size_t __i = 0; __i < __v.size(); __i++) {
-      __v[__i] = __a[__i] / __b[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__div(__a.__s_, __b.__s_));
   }
 
   friend simd operator%(const simd& __a, const simd& __b) {
-    simd __v;
-    for (size_t __i = 0; __i < __v.size(); __i++) {
-      __v[__i] = __a[__i] % __b[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__mod(__a.__s_, __b.__s_));
   }
 
   friend simd operator&(const simd& __a, const simd& __b) {
-    simd __v;
-    for (size_t __i = 0; __i < __v.size(); __i++) {
-      __v[__i] = __a[__i] & __b[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__and(__a.__s_, __b.__s_));
   }
 
   friend simd operator|(const simd& __a, const simd& __b) {
-    simd __v;
-    for (size_t __i = 0; __i < __v.size(); __i++) {
-      __v[__i] = __a[__i] | __b[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__or(__a.__s_, __b.__s_));
   }
 
   friend simd operator^(const simd& __a, const simd& __b) {
-    simd __v;
-    for (size_t __i = 0; __i < __v.size(); __i++) {
-      __v[__i] = __a[__i] ^ __b[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__xor(__a.__s_, __b.__s_));
   }
 
   friend simd operator<<(const simd& __a, const simd& __b) {
-    simd __v;
-    for (size_t __i = 0; __i < __v.size(); __i++) {
-      __v[__i] = __a[__i] << __b[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__shl(__a.__s_, __b.__s_));
   }
 
   friend simd operator>>(const simd& __a, const simd& __b) {
-    simd __v;
-    for (size_t __i = 0; __i < __v.size(); __i++) {
-      __v[__i] = __a[__i] >> __b[__i];
-    }
-    return __v;
+    return __from_storage(__simd_storage<_Tp, _Abi>::__shr(__a.__s_, __b.__s_));
   }
 
   friend simd operator<<(const simd& __a, int __offset) {
@@ -2219,51 +2480,27 @@
 
   // compares [simd.comparison]
   friend mask_type operator==(const simd& __a, const simd& __b) {
-    mask_type __mask;
-    for (size_t __i = 0; __i < __a.size(); __i++) {
-      __mask[__i] = __a[__i] == __b[__i];
-    }
-    return __mask;
+    return __cmp_eq_impl(__a, __b);
   }
 
   friend mask_type operator!=(const simd& __a, const simd& __b) {
-    mask_type __mask;
-    for (size_t __i = 0; __i < __a.size(); __i++) {
-      __mask[__i] = __a[__i] != __b[__i];
-    }
-    return __mask;
+    return __cmp_ne_impl(__a, __b);
   }
 
   friend mask_type operator>=(const simd& __a, const simd& __b) {
-    mask_type __mask;
-    for (size_t __i = 0; __i < __a.size(); __i++) {
-      __mask[__i] = __a[__i] >= __b[__i];
-    }
-    return __mask;
+    return __cmp_ge_impl(__a, __b);
   }
 
   friend mask_type operator<=(const simd& __a, const simd& __b) {
-    mask_type __mask;
-    for (size_t __i = 0; __i < __a.size(); __i++) {
-      __mask[__i] = __a[__i] <= __b[__i];
-    }
-    return __mask;
+    return __cmp_le_impl(__a, __b);
   }
 
   friend mask_type operator>(const simd& __a, const simd& __b) {
-    mask_type __mask;
-    for (size_t __i = 0; __i < __a.size(); __i++) {
-      __mask[__i] = __a[__i] > __b[__i];
-    }
-    return __mask;
+    return __cmp_gt_impl(__a, __b);
   }
 
   friend mask_type operator<(const simd& __a, const simd& __b) {
-    mask_type __mask;
-    for (size_t __i = 0; __i < __a.size(); __i++) {
-      __mask[__i] = __a[__i] < __b[__i];
-    }
-    return __mask;
+    return __cmp_lt_impl(__a, __b);
   }
 
 #if !defined(_LIBCPP_HAS_NO_VECTOR_EXTENSION) && defined(_LIBCPP_COMPILER_CLANG)
@@ -2287,6 +2524,9 @@
 
   friend struct __simd_mask_friend;
 
+  template <class, class>
+  friend class simd;
+
   // Use a non-member function, only because Clang 3.8 crashes with a member function.
   template <size_t __alignment>
   static void __copy_from_impl(simd_mask* __mask, const bool* __buffer

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D44659: [libcxx] Optimize -O0 performance for operators

Reply via email to