Author: ctopper Date: Wed Aug 31 00:38:58 2016 New Revision: 280197 URL: http://llvm.org/viewvc/llvm-project?rev=280197&view=rev Log: [AVX-512] Implement masked floating point logical operations with native IR and remove the builtins.
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/lib/Headers/avx512vldqintrin.h cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512vldq-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=280197&r1=280196&r2=280197&view=diff ============================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Aug 31 00:38:58 2016 @@ -1131,32 +1131,8 @@ TARGET_BUILTIN(__builtin_ia32_psubd512_m TARGET_BUILTIN(__builtin_ia32_pmulld512_mask, "V16iV16iV16iV16iUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmullq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_orpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_orps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_andpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_andps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq") TARGET_BUILTIN(__builtin_ia32_pmullq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_pmullq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_orpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_orpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_orps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_orps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_pabsb512_mask, "V64cV64cV64cULLi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pabsw512_mask, "V32sV32sV32sUi", "", "avx512bw") Modified: cfe/trunk/lib/Headers/avx512dqintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512dqintrin.h?rev=280197&r1=280196&r2=280197&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avx512dqintrin.h (original) +++ cfe/trunk/lib/Headers/avx512dqintrin.h Wed Aug 31 00:38:58 2016 @@ -54,179 +54,155 @@ _mm512_maskz_mullo_epi64 (__mmask8 __U, } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_xor_pd (__m512d __A, __m512d __B) { - return (__m512d) ((__v8du) __A ^ (__v8du) __B); +_mm512_xor_pd(__m512d __A, __m512d __B) { + return (__m512d)((__v8du)__A ^ (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); +_mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_xor_pd(__A, __B), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); +_mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_xor_pd(__A, __B), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_xor_ps (__m512 __A, __m512 __B) { - return (__m512) ((__v16su) __A ^ (__v16su) __B); + return (__m512)((__v16su)__A ^ (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); +_mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_xor_ps(__A, __B), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); +_mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_xor_ps(__A, __B), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_or_pd (__m512d __A, __m512d __B) { - return (__m512d) ((__v8du) __A | (__v8du) __B); +_mm512_or_pd(__m512d __A, __m512d __B) { + return (__m512d)((__v8du)__A | (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); +_mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_or_pd(__A, __B), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); +_mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_or_pd(__A, __B), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_or_ps (__m512 __A, __m512 __B) { - return (__m512) ((__v16su) __A | (__v16su) __B); +_mm512_or_ps(__m512 __A, __m512 __B) { + return (__m512)((__v16su)__A | (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); +_mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_or_ps(__A, __B), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); +_mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_or_ps(__A, __B), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_and_pd (__m512d __A, __m512d __B) { - return (__m512d) ((__v8du) __A & (__v8du) __B); +_mm512_and_pd(__m512d __A, __m512d __B) { + return (__m512d)((__v8du)__A & (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); +_mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_and_pd(__A, __B), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); +_mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_and_pd(__A, __B), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_and_ps (__m512 __A, __m512 __B) { - return (__m512) ((__v16su) __A & (__v16su) __B); +_mm512_and_ps(__m512 __A, __m512 __B) { + return (__m512)((__v16su)__A & (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); +_mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_and_ps(__A, __B), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); +_mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_and_ps(__A, __B), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_andnot_pd (__m512d __A, __m512d __B) { +_mm512_andnot_pd(__m512d __A, __m512d __B) { return (__m512d)(~(__v8du)__A & (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); +_mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_andnot_pd(__A, __B), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); +_mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_andnot_pd(__A, __B), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_andnot_ps (__m512 __A, __m512 __B) { +_mm512_andnot_ps(__m512 __A, __m512 __B) { return (__m512)(~(__v16su)__A & (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); +_mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_andnot_ps(__A, __B), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); +_mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_andnot_ps(__A, __B), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512i __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Headers/avx512vldqintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vldqintrin.h?rev=280197&r1=280196&r2=280197&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avx512vldqintrin.h (original) +++ cfe/trunk/lib/Headers/avx512vldqintrin.h Wed Aug 31 00:38:58 2016 @@ -76,276 +76,227 @@ _mm_maskz_mullo_epi64 (__mmask8 __U, __m } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __W, - (__mmask8) __U); +_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_andnot_pd(__A, __B), + (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); +_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_andnot_pd(__A, __B), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U); +_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_andnot_pd(__A, __B), + (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); +_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_andnot_pd(__A, __B), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __W, - (__mmask8) __U); +_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_andnot_ps(__A, __B), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); +_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_andnot_ps(__A, __B), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U); +_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_andnot_ps(__A, __B), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); +_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_andnot_ps(__A, __B), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __W, - (__mmask8) __U); +_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_and_pd(__A, __B), + (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); +_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_and_pd(__A, __B), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U); +_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_and_pd(__A, __B), + (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); +_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_and_pd(__A, __B), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __W, - (__mmask8) __U); +_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_and_ps(__A, __B), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); +_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_and_ps(__A, __B), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U); +_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_and_ps(__A, __B), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); +_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_and_ps(__A, __B), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, - __m256d __B) { - return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __W, - (__mmask8) __U); +_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_xor_pd(__A, __B), + (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); +_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_xor_pd(__A, __B), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U); +_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_xor_pd(__A, __B), + (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_xor_pd(__A, __B), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __W, - (__mmask8) __U); +_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_xor_ps(__A, __B), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); +_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_xor_ps(__A, __B), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U); +_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_xor_ps(__A, __B), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); +_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_xor_ps(__A, __B), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __W, - (__mmask8) __U); +_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_or_pd(__A, __B), + (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); +_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_or_pd(__A, __B), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U); +_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_or_pd(__A, __B), + (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); +_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_or_pd(__A, __B), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __W, - (__mmask8) __U); +_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_or_ps(__A, __B), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); +_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_or_ps(__A, __B), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U); +_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_or_ps(__A, __B), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); +_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_or_ps(__A, __B), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS Modified: cfe/trunk/test/CodeGen/avx512dq-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512dq-builtins.c?rev=280197&r1=280196&r2=280197&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx512dq-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512dq-builtins.c Wed Aug 31 00:38:58 2016 @@ -31,13 +31,17 @@ __m512d test_mm512_xor_pd (__m512d __A, __m512d test_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.512 + // CHECK: xor <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_xor_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.512 + // CHECK: xor <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_maskz_xor_pd(__U, __A, __B); } @@ -49,13 +53,17 @@ __m512 test_mm512_xor_ps (__m512 __A, __ __m512 test_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.512 + // CHECK: xor <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_mask_xor_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.512 + // CHECK: xor <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_maskz_xor_ps(__U, __A, __B); } @@ -67,13 +75,17 @@ __m512d test_mm512_or_pd (__m512d __A, _ __m512d test_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.512 + // CHECK: or <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_or_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.512 + // CHECK: or <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_maskz_or_pd(__U, __A, __B); } @@ -85,13 +97,17 @@ __m512 test_mm512_or_ps (__m512 __A, __m __m512 test_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.512 + // CHECK: or <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_mask_or_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.512 + // CHECK: or <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_maskz_or_ps(__U, __A, __B); } @@ -103,13 +119,17 @@ __m512d test_mm512_and_pd (__m512d __A, __m512d test_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.512 + // CHECK: and <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_and_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.512 + // CHECK: and <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_maskz_and_pd(__U, __A, __B); } @@ -121,13 +141,17 @@ __m512 test_mm512_and_ps (__m512 __A, __ __m512 test_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.512 + // CHECK: and <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_mask_and_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.512 + // CHECK: and <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_maskz_and_ps(__U, __A, __B); } @@ -140,13 +164,17 @@ __m512d test_mm512_andnot_pd (__m512d __ __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.512 + // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> + // CHECK: and <8 x i64> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_andnot_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.512 + // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> + // CHECK: and <8 x i64> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_maskz_andnot_pd(__U, __A, __B); } @@ -159,13 +187,17 @@ __m512 test_mm512_andnot_ps (__m512 __A, __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.512 + // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + // CHECK: and <16 x i32> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_mask_andnot_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.512 + // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + // CHECK: and <16 x i32> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_maskz_andnot_ps(__U, __A, __B); } Modified: cfe/trunk/test/CodeGen/avx512vldq-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vldq-builtins.c?rev=280197&r1=280196&r2=280197&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx512vldq-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512vldq-builtins.c Wed Aug 31 00:38:58 2016 @@ -43,193 +43,233 @@ __m128i test_mm_maskz_mullo_epi64 (__mma __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.256 + // CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1> + // CHECK: and <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_mask_andnot_pd ( __W, __U, __A, __B); } __m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_maskz_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.256 + // CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1> + // CHECK: and <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_maskz_andnot_pd (__U, __A, __B); } __m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.128 + // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1> + // CHECK: and <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_mask_andnot_pd ( __W, __U, __A, __B); } __m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.128 + // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1> + // CHECK: and <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_maskz_andnot_pd (__U, __A, __B); } __m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.256 + // CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + // CHECK: and <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_mask_andnot_ps ( __W, __U, __A, __B); } __m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_maskz_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.256 + // CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + // CHECK: and <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_maskz_andnot_ps (__U, __A, __B); } __m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.128 + // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1> + // CHECK: and <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_mask_andnot_ps ( __W, __U, __A, __B); } __m128 test_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.128 + // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1> + // CHECK: and <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_maskz_andnot_ps (__U, __A, __B); } __m256d test_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.256 + // CHECK: and <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_mask_and_pd ( __W, __U, __A, __B); } __m256d test_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_maskz_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.256 + // CHECK: and <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_maskz_and_pd (__U, __A, __B); } __m128d test_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.128 + // CHECK: and <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_mask_and_pd ( __W, __U, __A, __B); } __m128d test_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.128 + // CHECK: and <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_maskz_and_pd (__U, __A, __B); } __m256 test_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.256 + // CHECK: and <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_mask_and_ps ( __W, __U, __A, __B); } __m256 test_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_maskz_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.256 + // CHECK: and <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_maskz_and_ps (__U, __A, __B); } __m128 test_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.128 + // CHECK: and <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_mask_and_ps ( __W, __U, __A, __B); } __m128 test_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.128 + // CHECK: and <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_maskz_and_ps (__U, __A, __B); } __m256d test_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.256 + // CHECK: xor <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_mask_xor_pd ( __W, __U, __A, __B); } __m256d test_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_maskz_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.256 + // CHECK: xor <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_maskz_xor_pd (__U, __A, __B); } __m128d test_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.128 + // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_mask_xor_pd ( __W, __U, __A, __B); } __m128d test_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.128 + // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_maskz_xor_pd (__U, __A, __B); } __m256 test_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.256 + // CHECK: xor <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_mask_xor_ps ( __W, __U, __A, __B); } __m256 test_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_maskz_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.256 + // CHECK: xor <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_maskz_xor_ps (__U, __A, __B); } __m128 test_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.128 - return (__m128) _mm_mask_xor_ps ( __W, __U, __A, __B); + // CHECK: xor <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return (__m128) _mm_mask_xor_ps ( __W, __U, __A, __B); } __m128 test_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.128 + // CHECK: xor <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_maskz_xor_ps (__U, __A, __B); } __m256d test_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.256 + // CHECK: or <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_mask_or_pd ( __W, __U, __A, __B); } __m256d test_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_maskz_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.256 + // CHECK: or <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_maskz_or_pd (__U, __A, __B); } __m128d test_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.128 + // CHECK: or <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_mask_or_pd ( __W, __U, __A, __B); } __m128d test_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.128 + // CHECK: or <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_maskz_or_pd (__U, __A, __B); } __m256 test_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.256 + // CHECK: or <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_mask_or_ps ( __W, __U, __A, __B); } __m256 test_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_maskz_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.256 + // CHECK: or <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_maskz_or_ps (__U, __A, __B); } __m128 test_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.128 + // CHECK: or <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_mask_or_ps ( __W, __U, __A, __B); } __m128 test_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.128 + // CHECK: or <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_maskz_or_ps(__U, __A, __B); } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits