[PATCH] D24961: [avx512] Add aliases to some missing avx512 intrinsics.
aymanmus created this revision. aymanmus added reviewers: m_zuckerman, igorb, delena. aymanmus added a subscriber: cfe-commits. - "//_mm512_cmp_pd_mask//" Intrinsics, where = {eq, le, lt, neq, nle, nlt, ord, unord}. - //_mm512_cvtepi32lo_pd, _mm512_mask_cvtepi32lo_pd, _mm512_cvtepu32lo_pd, _mm512_mask_cvtepu32lo_pd// - //_mm512_cvtpd_pslo, _mm512_mask_cvtpd_pslo, _mm512_cvtpslo_pd, _mm512_mask_cvtpslo_pd// https://reviews.llvm.org/D24961 Files: lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -3555,10 +3555,49 @@ #define _mm512_cmp_ps_mask(A, B, P) \ _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) - #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) +#define _mm512_cmpeq_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) +#define _mm512_mask_cmpeq_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) + +#define _mm512_cmplt_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) +#define _mm512_mask_cmplt_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) + +#define _mm512_cmple_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) +#define _mm512_mask_cmple_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) + +#define _mm512_cmpunord_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) +#define _mm512_mask_cmpunord_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) + +#define _mm512_cmpneq_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) +#define _mm512_mask_cmpneq_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) + +#define _mm512_cmpnlt_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) +#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) + +#define _mm512_cmpnle_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) +#define _mm512_mask_cmpnle_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) + +#define _mm512_cmpord_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) +#define _mm512_mask_cmpord_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) + #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(P), \ @@ -3571,10 +3610,49 @@ #define _mm512_cmp_pd_mask(A, B, P) \ _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) - #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) +#define _mm512_cmpeq_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) +#define _mm512_mask_cmpeq_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) + +#define _mm512_cmplt_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) +#define _mm512_mask_cmplt_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) + +#define _mm512_cmple_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) +#define _mm512_mask_cmple_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) + +#define _mm512_cmpunord_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) +#define _mm512_mask_cmpunord_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) + +#define _mm512_cmpneq_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) +#define _mm512_mask_cmpneq_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) + +#define _mm512_cmpnlt_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) +#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) + +#define _mm512_cmpnle_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) +#define _mm512_mask_cmpnle_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) + +#define _mm512_cmpord_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) +#define _mm512_mask_cmpord_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) + /* Conversion */ #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \ @@ -3703,6 +3781,18 @@ (__mmask8) __U); } +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_cvtepi32lo_pd(__m512i __A) +{ + return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) +{ + return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); +} + static __inline
r282488 - [avx512] Add aliases to some missing avx512 intrinsics.
Author: aymanmus Date: Tue Sep 27 09:06:32 2016 New Revision: 282488 URL: http://llvm.org/viewvc/llvm-project?rev=282488&view=rev Log: [avx512] Add aliases to some missing avx512 intrinsics. Differential Revision:https: //reviews.llvm.org/D24961 Modified: cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=282488&r1=282487&r2=282488&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Sep 27 09:06:32 2016 @@ -3555,10 +3555,49 @@ _mm512_mask_blend_epi32(__mmask16 __U, _ #define _mm512_cmp_ps_mask(A, B, P) \ _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) - #define _mm512_mask_cmp_ps_mask(U, A, B, P) \ _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) +#define _mm512_cmpeq_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ) +#define _mm512_mask_cmpeq_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ) + +#define _mm512_cmplt_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_LT_OS) +#define _mm512_mask_cmplt_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS) + +#define _mm512_cmple_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_LE_OS) +#define _mm512_mask_cmple_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS) + +#define _mm512_cmpunord_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q) +#define _mm512_mask_cmpunord_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q) + +#define _mm512_cmpneq_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ) +#define _mm512_mask_cmpneq_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ) + +#define _mm512_cmpnlt_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_NLT_US) +#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US) + +#define _mm512_cmpnle_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_NLE_US) +#define _mm512_mask_cmpnle_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US) + +#define _mm512_cmpord_ps_mask(A, B) \ +_mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q) +#define _mm512_mask_cmpord_ps_mask(k, A, B) \ +_mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q) + #define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ (__v8df)(__m512d)(B), (int)(P), \ @@ -3571,10 +3610,49 @@ _mm512_mask_blend_epi32(__mmask16 __U, _ #define _mm512_cmp_pd_mask(A, B, P) \ _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) - #define _mm512_mask_cmp_pd_mask(U, A, B, P) \ _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) +#define _mm512_cmpeq_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ) +#define _mm512_mask_cmpeq_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ) + +#define _mm512_cmplt_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_LT_OS) +#define _mm512_mask_cmplt_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS) + +#define _mm512_cmple_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_LE_OS) +#define _mm512_mask_cmple_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS) + +#define _mm512_cmpunord_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q) +#define _mm512_mask_cmpunord_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q) + +#define _mm512_cmpneq_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ) +#define _mm512_mask_cmpneq_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ) + +#define _mm512_cmpnlt_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_NLT_US) +#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US) + +#define _mm512_cmpnle_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_NLE_US) +#define _mm512_mask_cmpnle_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US) + +#define _mm512_cmpord_pd_mask(A, B) \ +_mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q) +#define _mm512_mask_cmpord_pd_mask(k, A, B) \ +_mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q) + /* Conversion */ #define _mm512_cvtt_roundps_epu32(A, R) __extension__ ({ \ @@ -3703,6 +3781,18 @@ _mm512_maskz_cvtepi32_pd (__mmask8 __U, (__mmask8) __U); } +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_cvtepi32lo_pd(__m512i __A) +{ + return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi32lo_
r282492 - Update to commit r282488, fix the buildboot failure.
Author: aymanmus Date: Tue Sep 27 10:37:31 2016 New Revision: 282492 URL: http://llvm.org/viewvc/llvm-project?rev=282492&view=rev Log: Update to commit r282488, fix the buildboot failure. Modified: cfe/trunk/lib/Headers/avx512fintrin.h Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=282492&r1=282491&r2=282492&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Sep 27 10:37:31 2016 @@ -9381,13 +9381,13 @@ _mm512_maskz_cvtps_pd (__mmask8 __U, __m } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_cvtpslo_pd (__m512d __A) +_mm512_cvtpslo_pd (__m512 __A) { return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512d __A) +_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) { return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D26021: [X86][AVX512][Clang] Add support for mask_{move|store|load}_s{s/d} and int2mask/mask2int intrinsics.
aymanmus updated the summary for this revision. aymanmus updated this revision to Diff 77158. aymanmus added a comment. Fix spaces and indentations. https://reviews.llvm.org/D26021 Files: include/clang/Basic/BuiltinsX86.def lib/CodeGen/CGBuiltin.cpp lib/Headers/avx512fintrin.h test/CodeGen/avx512f-builtins.c Index: lib/Headers/avx512fintrin.h === --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -516,6 +516,18 @@ return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); } +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm512_int2mask(int __a) +{ + return (__mmask16)__a; +} + +static __inline__ int __DEFAULT_FN_ATTRS +_mm512_mask2int(__mmask16 __a) +{ + return (int)__a; +} + /* Bitwise operators */ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_and_epi32(__m512i __a, __m512i __b) @@ -9107,35 +9119,96 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U); + __m128 res = __A; + res[0] = (__U & 1) ? __B[0] : __W[0]; + return res; } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_movss_mask ((__v4sf) __A, (__v4sf) __B, - (__v4sf) - _mm_setzero_si128(), - (__mmask8) __U); + __m128 res = __A; + res[0] = (__U & 1) ? __B[0] : 0; + return res; } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_movsd_mask ((__v2df) __A, (__v2df) __B, - (__v2df) __W, - (__mmask8) __U); + __m128d res = __A; + res[0] = (__U & 1) ? __B[0] : __W[0]; + return res; } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_movsd_mask ((__v2df) __A, (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); + __m128d res = __A; + res[0] = (__U & 1) ? __B[0] : 0; + return res; +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) +{ + __builtin_ia32_storess128_mask ((__v16sf *)__W, +(__v16sf) _mm512_castps128_ps512(__A), +(__mmask16) __U & (__mmask16)1); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) +{ + __builtin_ia32_storesd128_mask ((__v8df *)__W, +(__v8df) _mm512_castpd128_pd512(__A), +(__mmask8) __U & 1); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) +{ + __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, +(__v4sf) {0.0, 0.0, 0.0, 0.0}, +0, 4, 4, 4); + + return (__m128) __builtin_shufflevector( + __builtin_ia32_loadss128_mask ((__v16sf *) __A, + (__v16sf) _mm512_castps128_ps512(src), + (__mmask16) __U & 1), + _mm512_undefined_ps(), 0, 1, 2, 3); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_load_ss (__mmask8 __U, const float* __A) +{ + return (__m128) __builtin_shufflevector( + __builtin_ia32_loadss128_mask ((__v16sf *) __A, + (__v16sf) _mm512_setzero_ps(), + (__mmask16) __U & 1), + _mm512_undefined_ps(), 0, 1, 2, 3); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) +{ + __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, + (__v2df) {0.0, 0.0}, 0, 2); + + return (__m128d) __builtin_shufflevector( +__builtin_ia32_loadsd128_mask ((__v8df *) __A, + (__v8df) _mm512_castpd128_pd512(src), + (__mmask8) __U & 1), +_mm512_undefined_pd(), 0, 1); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_load_sd (__mmask8 __U, const double* __A) +{ + return (__m128d) __builtin_shufflevector( +__builtin_ia32_loadsd128_mask ((__v8df *) __A, + (__v8df) _mm512_setzero_pd(), + (__mmask8) __U & 1), +_mm512_undefined_pd(), 0, 1); } #define _mm512_shuffle_epi32(A, I) __extension__ ({ \ Index: lib/CodeGen/CGBuiltin.cp
r286229 - [X86][AVX512][Clang] Add support for mask_{move|store|load}_s{s/d} and int2mask/mask2int intrinsics.
Author: aymanmus Date: Tue Nov 8 06:00:30 2016 New Revision: 286229 URL: http://llvm.org/viewvc/llvm-project?rev=286229&view=rev Log: [X86][AVX512][Clang] Add support for mask_{move|store|load}_s{s/d} and int2mask/mask2int intrinsics. Differential Revision: https://reviews.llvm.org/D26021 Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=286229&r1=286228&r2=286229&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Nov 8 06:00:30 2016 @@ -1448,8 +1448,10 @@ TARGET_BUILTIN(__builtin_ia32_fixupimmps TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V8dV8d*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V16fV16f*V16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc","","avx512vl") @@ -1466,8 +1468,10 @@ TARGET_BUILTIN(__builtin_ia32_storedquhi TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs","","avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi","","avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV8d*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV16f*V16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc","","avx512vl") @@ -1790,8 +1794,6 @@ TARGET_BUILTIN(__builtin_ia32_expandload TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movss_mask, "V4fV4fV4fV4fUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movsd_mask, "V2dV2dV2dV2dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs","","avx512f") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=286229&r1=286228&r2=286229&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Nov 8 06:00:30 2016 @@ -7386,6 +7386,10 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__builtin_ia32_storeups512_mask: return EmitX86MaskedStore(*this, Ops, 1); + case X86::BI__builtin_ia32_storess128_mask: + case X86::BI__builtin_ia32_storesd128_mask: { +return EmitX86MaskedStore(*this, Ops, 16); + } case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: @@ -7422,6 +7426,10 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__builtin_ia32_loaddqudi512_mask: return EmitX86MaskedLoad(*this, Ops, 1); + case X86::BI__builtin_ia32_loadss128_mask: + case X86::BI__builtin_ia32_loadsd128_mask: +return EmitX86MaskedLoad(*this, Ops, 16); + case X86::BI__builtin_ia32_loadaps128_mask: case X86::BI__builtin_ia32_loadaps256_mask: case X86::BI__builtin_ia32_loadaps512_mask: Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=286229&r1=286228&r2=286229&view=diff == --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Tue Nov 8 06:00:30 2016 @@ -516,6 +516,18
[PATCH] D26021: [X86][AVX512][Clang] Add support for mask_{move|store|load}_s{s/d} and int2mask/mask2int intrinsics.
This revision was automatically updated to reflect the committed changes. Closed by commit rL286229: [X86][AVX512][Clang] Add support for mask_{move|store|load}_s{s/d} and… (authored by aymanmus). Changed prior to commit: https://reviews.llvm.org/D26021?vs=77158&id=77170#toc Repository: rL LLVM https://reviews.llvm.org/D26021 Files: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/test/CodeGen/avx512f-builtins.c Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def === --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -1448,8 +1448,10 @@ TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V8dV8d*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V16fV16f*V16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc","","avx512vl") @@ -1466,8 +1468,10 @@ TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs","","avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi","","avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV8d*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV16f*V16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc","","avx512vl") @@ -1790,8 +1794,6 @@ TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movss_mask, "V4fV4fV4fV4fUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movsd_mask, "V2dV2dV2dV2dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs","","avx512f") Index: cfe/trunk/test/CodeGen/avx512f-builtins.c === --- cfe/trunk/test/CodeGen/avx512f-builtins.c +++ cfe/trunk/test/CodeGen/avx512f-builtins.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Wall -Werror | FileCheck %s +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -O2 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=O2 #include @@ -7995,34 +7996,143 @@ return _mm512_setzero_pd(); } +__mmask16 test_mm512_int2mask(int __a) +{ + // O2-LABEL: test_mm512_int2mask + // O2: trunc i32 %__a to i16 + return _mm512_int2mask(__a); +} + +int test_mm512_mask2int(__mmask16 __a) +{ + // O2-LABEL: test_mm512_mask2int + // O2: zext i16 %__a to i32 + return _mm512_mask2int(__a); +} + __m128 test_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_mask_move_ss - // CHECK: @llvm.x86.avx512.mask.move.ss + // O2-LABEL: @test_mm_mask_move_ss + // O2: %[[M:.*]] = and i8 %__U, 1 + // O2: %[[M2:.*]] = icmp ne i8 %[[M]], 0 + // O2: %[[ELM1:.*]] = extractelement <4 x float> %__B, i32 0 + // O2: %[[ELM2:.*]] = extractelement <4 x float> %__W, i32 0 + // O2: %[[SEL:.*]] = select i1 %[[M2]], float %[[ELM1]], float %[[ELM2]] + // O2: %[[RES:.*]] = insertelement <4 x float> %__A, float %[[SEL]], i32 0 + // O2: ret <4 x float> %[[RES]] return _mm_mask_move_ss ( __W, __U, __A, __B); } __m128 test_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) { - // CHECK-LABEL: @test_mm_maskz_move_ss - // CHECK: @llvm.x86.avx512.mask.move.ss + // O2-LABEL: @test_mm_maskz_move_ss + // O2: %[[M:.*]] = and i8 %__U, 1 + // O2: %[[M2:.*]] = icmp ne i8 %[[M]], 0 + // O2: %[[ELM1:.*]] = extractelement <4 x float> %__B, i32 0 + // O2: %[[SEL:.*]] = select i1 %[[M2]], flo