This revision was automatically updated to reflect the committed changes. Closed by commit rL312135: [X86] Implement broadcastf32x2 and broadcasti32x2 intrinsics using… (authored by ctopper).
Changed prior to commit: https://reviews.llvm.org/D37287?vs=113199&id=113275#toc Repository: rL LLVM https://reviews.llvm.org/D37287 Files: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/Headers/avx512dqintrin.h cfe/trunk/lib/Headers/avx512vldqintrin.h cfe/trunk/test/CodeGen/avx512dq-builtins.c cfe/trunk/test/CodeGen/avx512vldq-builtins.c
Index: cfe/trunk/lib/Headers/avx512dqintrin.h =================================================================== --- cfe/trunk/lib/Headers/avx512dqintrin.h +++ cfe/trunk/lib/Headers/avx512dqintrin.h @@ -973,25 +973,26 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_broadcast_f32x2 (__m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, - (__v16sf)_mm512_undefined_ps(), - (__mmask16) -1); + return (__m512)__builtin_shufflevector((__v4sf)__A, + (__v4sf)_mm_undefined_ps(), + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, - (__v16sf) - __O, __M); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, + (__v16sf)_mm512_broadcast_f32x2(__A), + (__v16sf)__O); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) { - return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, - (__v16sf)_mm512_setzero_ps (), - __M); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, + (__v16sf)_mm512_broadcast_f32x2(__A), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512 __DEFAULT_FN_ATTRS @@ -1044,25 +1045,26 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcast_i32x2 (__m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, - (__v16si)_mm512_setzero_si512(), - (__mmask16) -1); + return (__m512i)__builtin_shufflevector((__v4si)__A, + (__v4si)_mm_undefined_si128(), + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, - (__v16si) - __O, __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_broadcast_i32x2(__A), + (__v16si)__O); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) { - return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) __A, - (__v16si)_mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, + (__v16si)_mm512_broadcast_i32x2(__A), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS Index: cfe/trunk/lib/Headers/avx512vldqintrin.h =================================================================== --- cfe/trunk/lib/Headers/avx512vldqintrin.h +++ cfe/trunk/lib/Headers/avx512vldqintrin.h @@ -978,25 +978,25 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_broadcast_f32x2 (__m128 __A) { - return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, - (__v8sf)_mm256_undefined_ps(), - (__mmask8) -1); + return (__m256)__builtin_shufflevector((__v4sf)__A, + (__v4sf)_mm_undefined_ps(), + 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) { - return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, - (__v8sf) __O, - __M); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, + (__v8sf)_mm256_broadcast_f32x2(__A), + (__v8sf)__O); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) { - return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, - (__v8sf) _mm256_setzero_ps (), - __M); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, + (__v8sf)_mm256_broadcast_f32x2(__A), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS @@ -1025,49 +1025,49 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_broadcast_i32x2 (__m128i __A) { - return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, - (__v4si)_mm_undefined_si128(), - (__mmask8) -1); + return (__m128i)__builtin_shufflevector((__v4si)__A, + (__v4si)_mm_undefined_si128(), + 0, 1, 0, 1); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) { - return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, - (__v4si) __O, - __M); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, + (__v4si)_mm_broadcast_i32x2(__A), + (__v4si)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { - return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) __A, - (__v4si) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, + (__v4si)_mm_broadcast_i32x2(__A), + (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_broadcast_i32x2 (__m128i __A) { - return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, - (__v8si)_mm256_undefined_si256(), - (__mmask8) -1); + return (__m256i)__builtin_shufflevector((__v4si)__A, + (__v4si)_mm_undefined_si128(), + 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, - (__v8si) __O, - __M); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm256_broadcast_i32x2(__A), + (__v8si)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) __A, - (__v8si) _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, + (__v8si)_mm256_broadcast_i32x2(__A), + (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def =================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -1596,11 +1596,6 @@ TARGET_BUILTIN(__builtin_ia32_broadcastmb256, "V4LLiUc","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmw128, "V4iUs","","avx512cd,avx512vl") TARGET_BUILTIN(__builtin_ia32_broadcastmw256, "V8iUs","","avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_512_mask, "V16fV4fV16fUs","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_512_mask, "V16iV4iV16iUs","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_256_mask, "V8fV4fV8fUc","","avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_128_mask, "V4iV4iV4iUc","","avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_256_mask, "V8iV4iV8iUc","","avx512dq,avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl") Index: cfe/trunk/test/CodeGen/avx512vldq-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/avx512vldq-builtins.c +++ cfe/trunk/test/CodeGen/avx512vldq-builtins.c @@ -909,19 +909,21 @@ __m256 test_mm256_broadcast_f32x2(__m128 __A) { // CHECK-LABEL: @test_mm256_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> return _mm256_broadcast_f32x2(__A); } __m256 test_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) { // CHECK-LABEL: @test_mm256_mask_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_broadcast_f32x2(__O, __M, __A); } __m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) { // CHECK-LABEL: @test_mm256_maskz_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_broadcast_f32x2(__M, __A); } @@ -947,37 +949,41 @@ __m128i test_mm_broadcast_i32x2(__m128i __A) { // CHECK-LABEL: @test_mm_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1> return _mm_broadcast_i32x2(__A); } __m128i test_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm_mask_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_broadcast_i32x2(__O, __M, __A); } __m128i test_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 0, i32 1> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_broadcast_i32x2(__M, __A); } __m256i test_mm256_broadcast_i32x2(__m128i __A) { // CHECK-LABEL: @test_mm256_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> return _mm256_broadcast_i32x2(__A); } __m256i test_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_mask_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_broadcast_i32x2(__O, __M, __A); } __m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_maskz_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_broadcast_i32x2(__M, __A); } Index: cfe/trunk/test/CodeGen/avx512dq-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/avx512dq-builtins.c +++ cfe/trunk/test/CodeGen/avx512dq-builtins.c @@ -949,19 +949,21 @@ __m512 test_mm512_broadcast_f32x2(__m128 __A) { // CHECK-LABEL: @test_mm512_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> return _mm512_broadcast_f32x2(__A); } __m512 test_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_mask_broadcast_f32x2(__O, __M, __A); } __m512 test_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x2 - // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return _mm512_maskz_broadcast_f32x2(__M, __A); } @@ -1007,19 +1009,21 @@ __m512i test_mm512_broadcast_i32x2(__m128i __A) { // CHECK-LABEL: @test_mm512_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> return _mm512_broadcast_i32x2(__A); } __m512i test_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_mask_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_broadcast_i32x2(__O, __M, __A); } __m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x2 - // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_broadcast_i32x2(__M, __A); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits