Author: ctopper Date: Thu Jun 28 22:43:33 2018 New Revision: 335945 URL: http://llvm.org/viewvc/llvm-project?rev=335945&view=rev Log: [X86] Remove masking from the avx512 packed sqrt builtins. Use select builtins instead.
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/Headers/avx512fintrin.h cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/test/CodeGen/avx512f-builtins.c Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=335945&r1=335944&r2=335945&view=diff ============================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Jun 28 22:43:33 2018 @@ -866,8 +866,8 @@ TARGET_BUILTIN(__builtin_ia32_rdpkru, "U TARGET_BUILTIN(__builtin_ia32_wrpkru, "vUi", "n", "pku") // AVX-512 -TARGET_BUILTIN(__builtin_ia32_sqrtpd512_mask, "V8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_sqrtps512_mask, "V16fV16fV16fUsIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_sqrtpd512, "V8dV8dIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_sqrtps512, "V16fV16fIi", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "nc", "avx512f") Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=335945&r1=335944&r2=335945&view=diff ============================================================================== --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jun 28 22:43:33 2018 @@ -9889,24 +9889,22 @@ Value *CodeGenFunction::EmitX86BuiltinEx case X86::BI__builtin_ia32_sqrtpd256: case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps256: - case X86::BI__builtin_ia32_sqrtps: { - Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); - return Builder.CreateCall(F, {Ops[0]}); - } - case X86::BI__builtin_ia32_sqrtps512_mask: - case X86::BI__builtin_ia32_sqrtpd512_mask: { - unsigned CC = cast<llvm::ConstantInt>(Ops[3])->getZExtValue(); - // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), - // otherwise keep the intrinsic. - if (CC != 4) { - Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512_mask ? - Intrinsic::x86_avx512_mask_sqrt_ps_512 : - Intrinsic::x86_avx512_mask_sqrt_pd_512; - return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + case X86::BI__builtin_ia32_sqrtps: + case X86::BI__builtin_ia32_sqrtps512: + case X86::BI__builtin_ia32_sqrtpd512: { + if (Ops.size() == 2) { + unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), + // otherwise keep the intrinsic. + if (CC != 4) { + Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ? + Intrinsic::x86_avx512_sqrt_ps_512 : + Intrinsic::x86_avx512_sqrt_pd_512; + return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); + } } Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); - return EmitX86Select(*this, Ops[2], Builder.CreateCall(F, {Ops[0]}), - Ops[1]); + return Builder.CreateCall(F, Ops[0]); } case X86::BI__builtin_ia32_pabsb128: case X86::BI__builtin_ia32_pabsw128: Modified: cfe/trunk/lib/Headers/avx512fintrin.h URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=335945&r1=335944&r2=335945&view=diff ============================================================================== --- cfe/trunk/lib/Headers/avx512fintrin.h (original) +++ cfe/trunk/lib/Headers/avx512fintrin.h Thu Jun 28 22:43:33 2018 @@ -1492,89 +1492,76 @@ _mm512_mask_mullox_epi64(__m512i __W, __ (__v8di)__W); } +#define _mm512_sqrt_round_pd(A, R) \ + (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)) + #define _mm512_mask_sqrt_round_pd(W, U, A, R) \ - (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)(__m512d)(W), (__mmask8)(U), \ - (int)(R)) + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_sqrt_round_pd((A), (R)), \ + (__v8df)(__m512d)(W)) #define _mm512_maskz_sqrt_round_pd(U, A, R) \ - (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_setzero_pd(), \ - (__mmask8)(U), (int)(R)) - -#define _mm512_sqrt_round_pd(A, R) \ - (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)(__m512d)(A), \ - (__v8df)_mm512_undefined_pd(), \ - (__mmask8)-1, (int)(R)) + (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ + (__v8df)_mm512_sqrt_round_pd((A), (R)), \ + (__v8df)_mm512_setzero_pd()) static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_sqrt_pd(__m512d __a) +_mm512_sqrt_pd(__m512d __A) { - return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a, - (__v8df) _mm512_setzero_pd (), - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, + _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, - (__v8df) __W, - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_sqrt_pd(__A), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512d)__builtin_ia32_selectpd_512(__U, + (__v8df)_mm512_sqrt_pd(__A), + (__v8df)_mm512_setzero_pd()); } +#define _mm512_sqrt_round_ps(A, R) \ + (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)) + #define _mm512_mask_sqrt_round_ps(W, U, A, R) \ - (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)(__m512)(W), (__mmask16)(U), \ - (int)(R)) + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ + (__v16sf)(__m512)(W)) #define _mm512_maskz_sqrt_round_ps(U, A, R) \ - (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_setzero_ps(), \ - (__mmask16)(U), (int)(R)) - -#define _mm512_sqrt_round_ps(A, R) \ - (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)(__m512)(A), \ - (__v16sf)_mm512_undefined_ps(), \ - (__mmask16)-1, (int)(R)) + (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ + (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ + (__v16sf)_mm512_setzero_ps()) static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_sqrt_ps(__m512 __a) +_mm512_sqrt_ps(__m512 __A) { - return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a, - (__v16sf) _mm512_setzero_ps (), - (__mmask16) -1, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, + _MM_FROUND_CUR_DIRECTION); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { - return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, - (__v16sf) __W, - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_sqrt_ps(__A), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) { - return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__A, - (__v16sf) _mm512_setzero_ps (), - (__mmask16) __U, - _MM_FROUND_CUR_DIRECTION); + return (__m512)__builtin_ia32_selectps_512(__U, + (__v16sf)_mm512_sqrt_ps(__A), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=335945&r1=335944&r2=335945&view=diff ============================================================================== --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Thu Jun 28 22:43:33 2018 @@ -2373,6 +2373,8 @@ bool Sema::CheckX86BuiltinRoundingOrSAE( case X86::BI__builtin_ia32_vcvtss2si64: case X86::BI__builtin_ia32_vcvtss2usi32: case X86::BI__builtin_ia32_vcvtss2usi64: + case X86::BI__builtin_ia32_sqrtpd512: + case X86::BI__builtin_ia32_sqrtps512: ArgNum = 1; HasRC = true; break; @@ -2404,8 +2406,6 @@ bool Sema::CheckX86BuiltinRoundingOrSAE( case X86::BI__builtin_ia32_cvtqq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2pd512_mask: case X86::BI__builtin_ia32_cvtuqq2ps512_mask: - case X86::BI__builtin_ia32_sqrtpd512_mask: - case X86::BI__builtin_ia32_sqrtps512_mask: ArgNum = 3; HasRC = true; break; Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=335945&r1=335944&r2=335945&view=diff ============================================================================== --- cfe/trunk/test/CodeGen/avx512f-builtins.c (original) +++ cfe/trunk/test/CodeGen/avx512f-builtins.c Thu Jun 28 22:43:33 2018 @@ -30,26 +30,26 @@ __m512d test_mm512_maskz_sqrt_pd (__mmas __m512d test_mm512_mask_sqrt_round_pd(__m512d __W,__mmask8 __U,__m512d __A) { // CHECK-LABEL: @test_mm512_mask_sqrt_round_pd - // CHECK: call <8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) + // CHECK: call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 8) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} - return _mm512_mask_sqrt_round_pd(__W,__U,__A,_MM_FROUND_CUR_DIRECTION); + return _mm512_mask_sqrt_round_pd(__W,__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_maskz_sqrt_round_pd(__mmask8 __U,__m512d __A) { // CHECK-LABEL: @test_mm512_maskz_sqrt_round_pd - // CHECK: call <8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) + // CHECK: call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 8) // CHECK: bitcast i8 %{{.*}} to <8 x i1> // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> {{.*}} - return _mm512_maskz_sqrt_round_pd(__U,__A,_MM_FROUND_CUR_DIRECTION); + return _mm512_maskz_sqrt_round_pd(__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_sqrt_round_pd(__m512d __A) { // CHECK-LABEL: @test_mm512_sqrt_round_pd - // CHECK: call <8 x double> @llvm.sqrt.v8f64(<8 x double> %{{.*}}) - return _mm512_sqrt_round_pd(__A,_MM_FROUND_CUR_DIRECTION); + // CHECK: call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %{{.*}}, i32 8) + return _mm512_sqrt_round_pd(__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_sqrt_ps(__m512 a) @@ -80,26 +80,26 @@ __m512 test_mm512_maskz_sqrt_ps( __mmask __m512 test_mm512_mask_sqrt_round_ps(__m512 __W,__mmask16 __U,__m512 __A) { // CHECK-LABEL: @test_mm512_mask_sqrt_round_ps - // CHECK: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) + // CHECK: call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 8) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} - return _mm512_mask_sqrt_round_ps(__W,__U,__A,_MM_FROUND_CUR_DIRECTION); + return _mm512_mask_sqrt_round_ps(__W,__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_maskz_sqrt_round_ps(__mmask16 __U,__m512 __A) { // CHECK-LABEL: @test_mm512_maskz_sqrt_round_ps - // CHECK: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) + // CHECK: call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 8) // CHECK: bitcast i16 %{{.*}} to <16 x i1> // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> {{.*}} - return _mm512_maskz_sqrt_round_ps(__U,__A,_MM_FROUND_CUR_DIRECTION); + return _mm512_maskz_sqrt_round_ps(__U,__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512 test_mm512_sqrt_round_ps(__m512 __A) { // CHECK-LABEL: @test_mm512_sqrt_round_ps - // CHECK: call <16 x float> @llvm.sqrt.v16f32(<16 x float> %{{.*}}) - return _mm512_sqrt_round_ps(__A,_MM_FROUND_CUR_DIRECTION); + // CHECK: call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %{{.*}}, i32 8) + return _mm512_sqrt_round_ps(__A,_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); } __m512d test_mm512_rsqrt14_pd(__m512d a) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits