[PATCH] D24961: [avx512] Add aliases to some missing avx512 intrinsics.

2016-09-27 Thread Ayman Musa via cfe-commits
aymanmus created this revision.
aymanmus added reviewers: m_zuckerman, igorb, delena.
aymanmus added a subscriber: cfe-commits.

- "//_mm512_cmp_pd_mask//" Intrinsics, where  = {eq, le, lt, neq, nle, 
nlt, ord, unord}.
- //_mm512_cvtepi32lo_pd, _mm512_mask_cvtepi32lo_pd, _mm512_cvtepu32lo_pd, 
_mm512_mask_cvtepu32lo_pd//
- //_mm512_cvtpd_pslo, _mm512_mask_cvtpd_pslo, _mm512_cvtpslo_pd, 
_mm512_mask_cvtpslo_pd//

https://reviews.llvm.org/D24961

Files:
  lib/Headers/avx512fintrin.h
  test/CodeGen/avx512f-builtins.c

Index: lib/Headers/avx512fintrin.h
===
--- lib/Headers/avx512fintrin.h
+++ lib/Headers/avx512fintrin.h
@@ -3555,10 +3555,49 @@
 
 #define _mm512_cmp_ps_mask(A, B, P) \
   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
-
 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
 
+#define _mm512_cmpeq_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
+#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
+
+#define _mm512_cmplt_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
+#define _mm512_mask_cmplt_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
+
+#define _mm512_cmple_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
+#define _mm512_mask_cmple_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
+
+#define _mm512_cmpunord_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
+#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
+
+#define _mm512_cmpneq_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
+#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
+
+#define _mm512_cmpnlt_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
+#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
+
+#define _mm512_cmpnle_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
+#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
+
+#define _mm512_cmpord_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
+#define _mm512_mask_cmpord_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
+
 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
  (__v8df)(__m512d)(B), (int)(P), \
@@ -3571,10 +3610,49 @@
 
 #define _mm512_cmp_pd_mask(A, B, P) \
   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
-
 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
 
+#define _mm512_cmpeq_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
+#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
+
+#define _mm512_cmplt_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
+#define _mm512_mask_cmplt_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
+
+#define _mm512_cmple_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
+#define _mm512_mask_cmple_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
+
+#define _mm512_cmpunord_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
+#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
+
+#define _mm512_cmpneq_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
+#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
+
+#define _mm512_cmpnlt_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
+#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
+
+#define _mm512_cmpnle_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
+#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
+
+#define _mm512_cmpord_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
+#define _mm512_mask_cmpord_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
+
 /* Conversion */
 
 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
@@ -3703,6 +3781,18 @@
 (__mmask8) __U);
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_cvtepi32lo_pd(__m512i __A)
+{
+  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
+{
+  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
+}
+
 static __inline

r282488 - [avx512] Add aliases to some missing avx512 intrinsics.

2016-09-27 Thread Ayman Musa via cfe-commits
Author: aymanmus
Date: Tue Sep 27 09:06:32 2016
New Revision: 282488

URL: http://llvm.org/viewvc/llvm-project?rev=282488&view=rev
Log:
[avx512] Add aliases to some missing avx512 intrinsics.

Differential Revision:https: //reviews.llvm.org/D24961

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=282488&r1=282487&r2=282488&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Sep 27 09:06:32 2016
@@ -3555,10 +3555,49 @@ _mm512_mask_blend_epi32(__mmask16 __U, _
 
 #define _mm512_cmp_ps_mask(A, B, P) \
   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
-
 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
 
+#define _mm512_cmpeq_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
+#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
+
+#define _mm512_cmplt_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
+#define _mm512_mask_cmplt_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
+
+#define _mm512_cmple_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
+#define _mm512_mask_cmple_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
+
+#define _mm512_cmpunord_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
+#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
+
+#define _mm512_cmpneq_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
+#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
+
+#define _mm512_cmpnlt_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
+#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
+
+#define _mm512_cmpnle_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
+#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
+
+#define _mm512_cmpord_ps_mask(A, B) \
+_mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
+#define _mm512_mask_cmpord_ps_mask(k, A, B) \
+_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
+
 #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \
   (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
  (__v8df)(__m512d)(B), (int)(P), \
@@ -3571,10 +3610,49 @@ _mm512_mask_blend_epi32(__mmask16 __U, _
 
 #define _mm512_cmp_pd_mask(A, B, P) \
   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
-
 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
 
+#define _mm512_cmpeq_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
+#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
+
+#define _mm512_cmplt_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
+#define _mm512_mask_cmplt_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
+
+#define _mm512_cmple_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
+#define _mm512_mask_cmple_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
+
+#define _mm512_cmpunord_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
+#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
+
+#define _mm512_cmpneq_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
+#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
+
+#define _mm512_cmpnlt_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
+#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
+
+#define _mm512_cmpnle_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
+#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
+
+#define _mm512_cmpord_pd_mask(A, B) \
+_mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
+#define _mm512_mask_cmpord_pd_mask(k, A, B) \
+_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
+
 /* Conversion */
 
 #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \
@@ -3703,6 +3781,18 @@ _mm512_maskz_cvtepi32_pd (__mmask8 __U,
 (__mmask8) __U);
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_cvtepi32lo_pd(__m512i __A)
+{
+  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi32lo_

r282492 - Update to commit r282488, fix the buildboot failure.

2016-09-27 Thread Ayman Musa via cfe-commits
Author: aymanmus
Date: Tue Sep 27 10:37:31 2016
New Revision: 282492

URL: http://llvm.org/viewvc/llvm-project?rev=282492&view=rev
Log:
Update to commit r282488, fix the buildboot failure.

Modified:
cfe/trunk/lib/Headers/avx512fintrin.h

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=282492&r1=282491&r2=282492&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Sep 27 10:37:31 2016
@@ -9381,13 +9381,13 @@ _mm512_maskz_cvtps_pd (__mmask8 __U, __m
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_cvtpslo_pd (__m512d __A)
+_mm512_cvtpslo_pd (__m512 __A)
 {
   return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
 }
 
 static __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512d __A)
+_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
 {
   return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D26021: [X86][AVX512][Clang] Add support for mask_{move|store|load}_s{s/d} and int2mask/mask2int intrinsics.

2016-11-08 Thread Ayman Musa via cfe-commits
aymanmus updated the summary for this revision.
aymanmus updated this revision to Diff 77158.
aymanmus added a comment.

Fix spaces and indentations.


https://reviews.llvm.org/D26021

Files:
  include/clang/Basic/BuiltinsX86.def
  lib/CodeGen/CGBuiltin.cpp
  lib/Headers/avx512fintrin.h
  test/CodeGen/avx512f-builtins.c

Index: lib/Headers/avx512fintrin.h
===
--- lib/Headers/avx512fintrin.h
+++ lib/Headers/avx512fintrin.h
@@ -516,6 +516,18 @@
   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
 }
 
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+_mm512_int2mask(int __a)
+{
+  return (__mmask16)__a;
+}
+
+static __inline__ int __DEFAULT_FN_ATTRS
+_mm512_mask2int(__mmask16 __a)
+{
+  return (int)__a;
+}
+
 /* Bitwise operators */
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_and_epi32(__m512i __a, __m512i __b)
@@ -9107,35 +9119,96 @@
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B,
-   (__v4sf) __W,
-   (__mmask8) __U);
+  __m128 res = __A; 
+  res[0] = (__U & 1) ? __B[0] : __W[0];
+  return res; 
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B,
-   (__v4sf)
-   _mm_setzero_si128(),
-   (__mmask8) __U);
+  __m128 res = __A; 
+  res[0] = (__U & 1) ? __B[0] : 0; 
+  return res; 
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_movsd_mask ((__v2df) __A, (__v2df) __B,
-   (__v2df) __W,
-   (__mmask8) __U);
+  __m128d res = __A; 
+  res[0] = (__U & 1) ? __B[0] : __W[0];
+  return res; 
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_movsd_mask ((__v2df) __A, (__v2df) __B,
-   (__v2df)
-   _mm_setzero_pd (),
-   (__mmask8) __U);
+  __m128d res = __A; 
+  res[0] = (__U & 1) ? __B[0] : 0; 
+  return res; 
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
+{
+  __builtin_ia32_storess128_mask ((__v16sf *)__W, 
+(__v16sf) _mm512_castps128_ps512(__A),
+(__mmask16) __U & (__mmask16)1);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
+{
+  __builtin_ia32_storesd128_mask ((__v8df *)__W, 
+(__v8df) _mm512_castpd128_pd512(__A),
+(__mmask8) __U & 1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
+{
+  __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
+(__v4sf) {0.0, 0.0, 0.0, 0.0},
+0, 4, 4, 4);
+
+  return (__m128) __builtin_shufflevector(
+   __builtin_ia32_loadss128_mask ((__v16sf *) __A,
+  (__v16sf) _mm512_castps128_ps512(src),
+  (__mmask16) __U & 1),
+   _mm512_undefined_ps(), 0, 1, 2, 3);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_load_ss (__mmask8 __U, const float* __A)
+{
+  return (__m128) __builtin_shufflevector(
+   __builtin_ia32_loadss128_mask ((__v16sf *) __A,
+  (__v16sf) _mm512_setzero_ps(),
+  (__mmask16) __U & 1),
+   _mm512_undefined_ps(), 0, 1, 2, 3);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
+{
+  __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
+ (__v2df) {0.0, 0.0}, 0, 2);
+
+  return (__m128d) __builtin_shufflevector(
+__builtin_ia32_loadsd128_mask ((__v8df *) __A,
+  (__v8df) _mm512_castpd128_pd512(src),
+  (__mmask8) __U & 1),
+_mm512_undefined_pd(), 0, 1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_load_sd (__mmask8 __U, const double* __A)
+{
+  return (__m128d) __builtin_shufflevector(
+__builtin_ia32_loadsd128_mask ((__v8df *) __A,
+  (__v8df) _mm512_setzero_pd(),
+  (__mmask8) __U & 1),
+_mm512_undefined_pd(), 0, 1);
 }
 
 #define _mm512_shuffle_epi32(A, I) __extension__ ({ \
Index: lib/CodeGen/CGBuiltin.cp

r286229 - [X86][AVX512][Clang] Add support for mask_{move|store|load}_s{s/d} and int2mask/mask2int intrinsics.

2016-11-08 Thread Ayman Musa via cfe-commits
Author: aymanmus
Date: Tue Nov  8 06:00:30 2016
New Revision: 286229

URL: http://llvm.org/viewvc/llvm-project?rev=286229&view=rev
Log:
[X86][AVX512][Clang] Add support for mask_{move|store|load}_s{s/d} and 
int2mask/mask2int intrinsics.

Differential Revision: https://reviews.llvm.org/D26021


Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=286229&r1=286228&r2=286229&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Nov  8 06:00:30 2016
@@ -1448,8 +1448,10 @@ TARGET_BUILTIN(__builtin_ia32_fixupimmps
 TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, 
"V8fV8fV8fV8iIiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, 
"V8fV8fV8fV8iIiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V8dV8d*V8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V16fV16f*V16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, 
"V2LLiV2LLi*V2LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, 
"V4LLiV4LLi*V4LLiUc","","avx512vl")
@@ -1466,8 +1468,10 @@ TARGET_BUILTIN(__builtin_ia32_storedquhi
 TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, 
"vV16c*V16cUs","","avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, 
"vV32c*V32cUi","","avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV8d*V8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV16f*V16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, 
"vV2LLi*V2LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, 
"vV4LLi*V4LLiUc","","avx512vl")
@@ -1790,8 +1794,6 @@ TARGET_BUILTIN(__builtin_ia32_expandload
 TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movss_mask, "V4fV4fV4fV4fUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movsd_mask, "V2dV2dV2dV2dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, 
"vV8d*V8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, 
"vV8LLi*V8LLiUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, 
"vV16f*V16fUs","","avx512f")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=286229&r1=286228&r2=286229&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Nov  8 06:00:30 2016
@@ -7386,6 +7386,10 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   case X86::BI__builtin_ia32_storeups512_mask:
 return EmitX86MaskedStore(*this, Ops, 1);
 
+  case X86::BI__builtin_ia32_storess128_mask:
+  case X86::BI__builtin_ia32_storesd128_mask: {
+return EmitX86MaskedStore(*this, Ops, 16);
+  }
   case X86::BI__builtin_ia32_movdqa32store128_mask:
   case X86::BI__builtin_ia32_movdqa64store128_mask:
   case X86::BI__builtin_ia32_storeaps128_mask:
@@ -7422,6 +7426,10 @@ Value *CodeGenFunction::EmitX86BuiltinEx
   case X86::BI__builtin_ia32_loaddqudi512_mask:
 return EmitX86MaskedLoad(*this, Ops, 1);
 
+  case X86::BI__builtin_ia32_loadss128_mask:
+  case X86::BI__builtin_ia32_loadsd128_mask:
+return EmitX86MaskedLoad(*this, Ops, 16);
+
   case X86::BI__builtin_ia32_loadaps128_mask:
   case X86::BI__builtin_ia32_loadaps256_mask:
   case X86::BI__builtin_ia32_loadaps512_mask:

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=286229&r1=286228&r2=286229&view=diff
==
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Nov  8 06:00:30 2016
@@ -516,6 +516,18

[PATCH] D26021: [X86][AVX512][Clang] Add support for mask_{move|store|load}_s{s/d} and int2mask/mask2int intrinsics.

2016-11-08 Thread Ayman Musa via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL286229: [X86][AVX512][Clang] Add support for 
mask_{move|store|load}_s{s/d} and… (authored by aymanmus).

Changed prior to commit:
  https://reviews.llvm.org/D26021?vs=77158&id=77170#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D26021

Files:
  cfe/trunk/include/clang/Basic/BuiltinsX86.def
  cfe/trunk/lib/CodeGen/CGBuiltin.cpp
  cfe/trunk/lib/Headers/avx512fintrin.h
  cfe/trunk/test/CodeGen/avx512f-builtins.c

Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def
===
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def
@@ -1448,8 +1448,10 @@
 TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V8dV8d*V8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V16fV16f*V16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc","","avx512vl")
@@ -1466,8 +1468,10 @@
 TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs","","avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi","","avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV8d*V8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV16f*V16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc","","avx512vl")
@@ -1790,8 +1794,6 @@
 TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movss_mask, "V4fV4fV4fV4fUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_movsd_mask, "V2dV2dV2dV2dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs","","avx512f")
Index: cfe/trunk/test/CodeGen/avx512f-builtins.c
===
--- cfe/trunk/test/CodeGen/avx512f-builtins.c
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -O2 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=O2
 
 #include 
 
@@ -7995,34 +7996,143 @@
   return _mm512_setzero_pd();
 }
 
+__mmask16 test_mm512_int2mask(int __a)
+{
+  // O2-LABEL: test_mm512_int2mask
+  // O2: trunc i32 %__a to i16
+  return _mm512_int2mask(__a);
+}
+
+int test_mm512_mask2int(__mmask16 __a)
+{
+  // O2-LABEL: test_mm512_mask2int
+  // O2: zext i16 %__a to i32
+  return _mm512_mask2int(__a);
+}
+
 __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
-  // CHECK-LABEL: @test_mm_mask_move_ss
-  // CHECK: @llvm.x86.avx512.mask.move.ss
+  // O2-LABEL: @test_mm_mask_move_ss
+  // O2: %[[M:.*]] = and i8 %__U, 1
+  // O2: %[[M2:.*]] = icmp ne i8 %[[M]], 0
+  // O2: %[[ELM1:.*]] = extractelement <4 x float> %__B, i32 0
+  // O2: %[[ELM2:.*]] = extractelement <4 x float> %__W, i32 0
+  // O2: %[[SEL:.*]] = select i1 %[[M2]], float %[[ELM1]], float %[[ELM2]]
+  // O2: %[[RES:.*]] = insertelement <4 x float> %__A, float %[[SEL]], i32 0
+  // O2: ret <4 x float> %[[RES]]
   return _mm_mask_move_ss ( __W,  __U,  __A,  __B);
 }
 
 __m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
-  // CHECK-LABEL: @test_mm_maskz_move_ss
-  // CHECK: @llvm.x86.avx512.mask.move.ss
+  // O2-LABEL: @test_mm_maskz_move_ss
+  // O2: %[[M:.*]] = and i8 %__U, 1
+  // O2: %[[M2:.*]] = icmp ne i8 %[[M]], 0
+  // O2: %[[ELM1:.*]] = extractelement <4 x float> %__B, i32 0
+  // O2: %[[SEL:.*]] = select i1 %[[M2]], flo